X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fpage_cgroup.c;h=3dd88539a0e6b47876ec87a91ee093533e1392c9;hb=024914477e15ef8b17f271ec47f1bb8a589f0806;hp=0b3cbf090a674a1424cb1d7cc9e4cd4f3bfa4c02;hpb=dc19f9db38295f811d9041bd89b113beccbd763a;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 0b3cbf0..3dd8853 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -8,6 +8,7 @@ #include #include #include +#include static void __meminit __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) @@ -15,6 +16,7 @@ __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) pc->flags = 0; pc->mem_cgroup = NULL; pc->page = pfn_to_page(pfn); + INIT_LIST_HEAD(&pc->lru); } static unsigned long total_usage; @@ -49,6 +51,9 @@ static int __init alloc_node_page_cgroup(int nid) start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; + if (!nr_pages) + return 0; + table_size = sizeof(struct page_cgroup) * nr_pages; base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), @@ -64,12 +69,12 @@ static int __init alloc_node_page_cgroup(int nid) return 0; } -void __init page_cgroup_init(void) +void __init page_cgroup_init_flatmem(void) { int nid, fail; - if (mem_cgroup_subsys.disabled) + if (mem_cgroup_disabled()) return; for_each_online_node(nid) { @@ -78,12 +83,12 @@ void __init page_cgroup_init(void) goto fail; } printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); - printk(KERN_INFO "please try cgroup_disable=memory option if you" - " don't want\n"); + printk(KERN_INFO "please try 'cgroup_disable=memory' option if you" + " don't want memory cgroups\n"); return; fail: - printk(KERN_CRIT "allocation of page_cgroup was failed.\n"); - printk(KERN_CRIT "please try cgroup_disable=memory boot option\n"); + printk(KERN_CRIT "allocation of page_cgroup failed.\n"); + printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n"); panic("Out of memory"); } @@ -94,30 +99,32 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); + if (!section->page_cgroup) + return NULL; return section->page_cgroup + pfn; } /* __alloc_bootmem...() is protected by !slab_available() */ -int __init_refok init_section_page_cgroup(unsigned long pfn) +static int __init_refok init_section_page_cgroup(unsigned long pfn) { - struct mem_section *section; + struct mem_section *section = __pfn_to_section(pfn); struct page_cgroup *base, *pc; unsigned long table_size; int nid, index; - section = __pfn_to_section(pfn); - if (!section->page_cgroup) { nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - if (slab_is_available()) { - base = kmalloc_node(table_size, GFP_KERNEL, nid); + VM_BUG_ON(!slab_is_available()); + if (node_state(nid, N_HIGH_MEMORY)) { + base = kmalloc_node(table_size, + GFP_KERNEL | __GFP_NOWARN, nid); if (!base) base = vmalloc_node(table_size, nid); } else { - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), - table_size, - PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); + if (!base) + base = vmalloc(table_size); } } else { /* @@ -142,7 +149,6 @@ int __init_refok init_section_page_cgroup(unsigned long pfn) __init_page_cgroup(pc, pfn + index); } - section = __pfn_to_section(pfn); section->page_cgroup = base - pfn; total_usage += table_size; return 0; @@ -245,7 +251,7 @@ void __init page_cgroup_init(void) unsigned long pfn; int fail = 0; - if (mem_cgroup_subsys.disabled) + if (mem_cgroup_disabled()) return; for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { @@ -254,14 +260,14 @@ void __init page_cgroup_init(void) fail = init_section_page_cgroup(pfn); } if (fail) { - printk(KERN_CRIT "try cgroup_disable=memory boot option\n"); + printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n"); panic("Out of memory"); } else { hotplug_memory_notifier(page_cgroup_callback, 0); } printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); - printk(KERN_INFO "please try cgroup_disable=memory option if you don't" - " want\n"); + printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't" + " want memory cgroups\n"); } void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) @@ -270,3 +276,210 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) } #endif + + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP + +static DEFINE_MUTEX(swap_cgroup_mutex); +struct swap_cgroup_ctrl { + struct page **map; + unsigned long length; +}; + +struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; + +struct swap_cgroup { + unsigned short id; +}; +#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) +#define SC_POS_MASK (SC_PER_PAGE - 1) + +/* + * SwapCgroup implements "lookup" and "exchange" operations. + * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge + * against SwapCache. At swap_free(), this is accessed directly from swap. + * + * This means, + * - we have no race in "exchange" when we're accessed via SwapCache because + * SwapCache(and its swp_entry) is under lock. + * - When called via swap_free(), there is no user of this entry and no race. + * Then, we don't need lock around "exchange". + * + * TODO: we can push these buffers out to HIGHMEM. + */ + +/* + * allocate buffer for swap_cgroup. + */ +static int swap_cgroup_prepare(int type) +{ + struct page *page; + struct swap_cgroup_ctrl *ctrl; + unsigned long idx, max; + + ctrl = &swap_cgroup_ctrl[type]; + + for (idx = 0; idx < ctrl->length; idx++) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto not_enough_page; + ctrl->map[idx] = page; + } + return 0; +not_enough_page: + max = idx; + for (idx = 0; idx < max; idx++) + __free_page(ctrl->map[idx]); + + return -ENOMEM; +} + +/** + * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. + * @end: swap entry to be cmpxchged + * @old: old id + * @new: new id + * + * Returns old id at success, 0 at failure. + * (There is no mem_cgroup useing 0 as its id) + */ +unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, + unsigned short old, unsigned short new) +{ + int type = swp_type(ent); + unsigned long offset = swp_offset(ent); + unsigned long idx = offset / SC_PER_PAGE; + unsigned long pos = offset & SC_POS_MASK; + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + struct swap_cgroup *sc; + + ctrl = &swap_cgroup_ctrl[type]; + + mappage = ctrl->map[idx]; + sc = page_address(mappage); + sc += pos; + if (cmpxchg(&sc->id, old, new) == old) + return old; + else + return 0; +} + +/** + * swap_cgroup_record - record mem_cgroup for this swp_entry. + * @ent: swap entry to be recorded into + * @mem: mem_cgroup to be recorded + * + * Returns old value at success, 0 at failure. + * (Of course, old value can be 0.) + */ +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) +{ + int type = swp_type(ent); + unsigned long offset = swp_offset(ent); + unsigned long idx = offset / SC_PER_PAGE; + unsigned long pos = offset & SC_POS_MASK; + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + struct swap_cgroup *sc; + unsigned short old; + + ctrl = &swap_cgroup_ctrl[type]; + + mappage = ctrl->map[idx]; + sc = page_address(mappage); + sc += pos; + old = xchg(&sc->id, id); + + return old; +} + +/** + * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry + * @ent: swap entry to be looked up. + * + * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) + */ +unsigned short lookup_swap_cgroup(swp_entry_t ent) +{ + int type = swp_type(ent); + unsigned long offset = swp_offset(ent); + unsigned long idx = offset / SC_PER_PAGE; + unsigned long pos = offset & SC_POS_MASK; + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + struct swap_cgroup *sc; + unsigned short ret; + + ctrl = &swap_cgroup_ctrl[type]; + mappage = ctrl->map[idx]; + sc = page_address(mappage); + sc += pos; + ret = sc->id; + return ret; +} + +int swap_cgroup_swapon(int type, unsigned long max_pages) +{ + void *array; + unsigned long array_size; + unsigned long length; + struct swap_cgroup_ctrl *ctrl; + + if (!do_swap_account) + return 0; + + length = ((max_pages/SC_PER_PAGE) + 1); + array_size = length * sizeof(void *); + + array = vmalloc(array_size); + if (!array) + goto nomem; + + memset(array, 0, array_size); + ctrl = &swap_cgroup_ctrl[type]; + mutex_lock(&swap_cgroup_mutex); + ctrl->length = length; + ctrl->map = array; + if (swap_cgroup_prepare(type)) { + /* memory shortage */ + ctrl->map = NULL; + ctrl->length = 0; + vfree(array); + mutex_unlock(&swap_cgroup_mutex); + goto nomem; + } + mutex_unlock(&swap_cgroup_mutex); + + return 0; +nomem: + printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); + printk(KERN_INFO + "swap_cgroup can be disabled by noswapaccount boot option\n"); + return -ENOMEM; +} + +void swap_cgroup_swapoff(int type) +{ + int i; + struct swap_cgroup_ctrl *ctrl; + + if (!do_swap_account) + return; + + mutex_lock(&swap_cgroup_mutex); + ctrl = &swap_cgroup_ctrl[type]; + if (ctrl->map) { + for (i = 0; i < ctrl->length; i++) { + struct page *page = ctrl->map[i]; + if (page) + __free_page(page); + } + vfree(ctrl->map); + ctrl->map = NULL; + ctrl->length = 0; + } + mutex_unlock(&swap_cgroup_mutex); +} + +#endif