#define do_swap_account (0)
#endif
-#define SOFTLIMIT_EVENTS_THRESH (1000)
-#define THRESHOLDS_EVENTS_THRESH (100)
+/*
+ * Per memcg event counter is incremented at every pagein/pageout. This counter
+ * is used for trigger some periodic events. This is straightforward and better
+ * than using jiffies etc. to handle periodic memcg event.
+ *
+ * These values will be used as !((event) & ((1 <<(thresh)) - 1))
+ */
+#define THRESHOLDS_EVENTS_THRESH (7) /* once in 128 */
+#define SOFTLIMIT_EVENTS_THRESH (10) /* once in 1024 */
/*
* Statistics for memory cgroup.
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
- MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out.
- used by soft limit implementation */
- MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out.
- used by threshold implementation */
+ MEM_CGROUP_EVENTS, /* incremented at every pagein/pageout */
MEM_CGROUP_STAT_NSTATS,
};
struct mem_cgroup_stat_cpu {
s64 count[MEM_CGROUP_STAT_NSTATS];
-} ____cacheline_aligned_in_smp;
-
-struct mem_cgroup_stat {
- struct mem_cgroup_stat_cpu cpustat[0];
};
-static inline void
-__mem_cgroup_stat_set_safe(struct mem_cgroup_stat_cpu *stat,
- enum mem_cgroup_stat_index idx, s64 val)
-{
- stat->count[idx] = val;
-}
-
-static inline s64
-__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
- enum mem_cgroup_stat_index idx)
-{
- return stat->count[idx];
-}
-
-/*
- * For accounting under irq disable, no need for increment preempt count.
- */
-static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
- enum mem_cgroup_stat_index idx, int val)
-{
- stat->count[idx] += val;
-}
-
-static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
- enum mem_cgroup_stat_index idx)
-{
- int cpu;
- s64 ret = 0;
- for_each_possible_cpu(cpu)
- ret += stat->cpustat[cpu].count[idx];
- return ret;
-}
-
-static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat)
-{
- s64 ret;
-
- ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE);
- ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS);
- return ret;
-}
-
/*
* per-zone information in memory controller.
*/
struct mem_cgroup_threshold entries[0];
};
-static bool mem_cgroup_threshold_check(struct mem_cgroup *mem);
static void mem_cgroup_threshold(struct mem_cgroup *mem);
/*
* Should the accounting and control be hierarchical, per subtree?
*/
bool use_hierarchy;
- unsigned long last_oom_jiffies;
+ atomic_t oom_lock;
atomic_t refcnt;
unsigned int swappiness;
unsigned long move_charge_at_immigrate;
/*
- * statistics. This must be placed at the end of memcg.
+ * percpu counter.
*/
- struct mem_cgroup_stat stat;
+ struct mem_cgroup_stat_cpu *stat;
};
/* Stuffs for move charges at task migration. */
spin_unlock(&mctz->lock);
}
-static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
-{
- bool ret = false;
- int cpu;
- s64 val;
- struct mem_cgroup_stat_cpu *cpustat;
-
- cpu = get_cpu();
- cpustat = &mem->stat.cpustat[cpu];
- val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_SOFTLIMIT);
- if (unlikely(val < 0)) {
- __mem_cgroup_stat_set_safe(cpustat, MEM_CGROUP_STAT_SOFTLIMIT,
- SOFTLIMIT_EVENTS_THRESH);
- ret = true;
- }
- put_cpu();
- return ret;
-}
static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
{
return mz;
}
+static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
+ enum mem_cgroup_stat_index idx)
+{
+ int cpu;
+ s64 val = 0;
+
+ for_each_possible_cpu(cpu)
+ val += per_cpu(mem->stat->count[idx], cpu);
+ return val;
+}
+
+static s64 mem_cgroup_local_usage(struct mem_cgroup *mem)
+{
+ s64 ret;
+
+ ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
+ ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
+ return ret;
+}
+
static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
bool charge)
{
int val = (charge) ? 1 : -1;
- struct mem_cgroup_stat *stat = &mem->stat;
- struct mem_cgroup_stat_cpu *cpustat;
- int cpu = get_cpu();
-
- cpustat = &stat->cpustat[cpu];
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
- put_cpu();
+ this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
}
static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
bool charge)
{
int val = (charge) ? 1 : -1;
- struct mem_cgroup_stat *stat = &mem->stat;
- struct mem_cgroup_stat_cpu *cpustat;
- int cpu = get_cpu();
- cpustat = &stat->cpustat[cpu];
+ preempt_disable();
+
if (PageCgroupCache(pc))
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
+ __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val);
else
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
+ __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val);
if (charge)
- __mem_cgroup_stat_add_safe(cpustat,
- MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
+ __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
else
- __mem_cgroup_stat_add_safe(cpustat,
- MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SOFTLIMIT, -1);
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_THRESHOLDS, -1);
+ __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
+ __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]);
- put_cpu();
+ preempt_enable();
}
static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
return total;
}
+static bool __memcg_event_check(struct mem_cgroup *mem, int event_mask_shift)
+{
+ s64 val;
+
+ val = this_cpu_read(mem->stat->count[MEM_CGROUP_EVENTS]);
+
+ return !(val & ((1 << event_mask_shift) - 1));
+}
+
+/*
+ * Check events in order.
+ *
+ */
+static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
+{
+ /* threshold event is triggered in finer grain than soft limit */
+ if (unlikely(__memcg_event_check(mem, THRESHOLDS_EVENTS_THRESH))) {
+ mem_cgroup_threshold(mem);
+ if (unlikely(__memcg_event_check(mem, SOFTLIMIT_EVENTS_THRESH)))
+ mem_cgroup_update_tree(mem, page);
+ }
+}
+
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
{
return container_of(cgroup_subsys_state(cont,
}
/**
- * mem_cgroup_print_mem_info: Called from OOM with tasklist_lock held in read mode.
+ * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
* @memcg: The memory cgroup that went over limit
* @p: Task that is going to be killed
*
}
}
}
- if (!mem_cgroup_local_usage(&victim->stat)) {
+ if (!mem_cgroup_local_usage(victim)) {
/* this cgroup's local usage == 0 */
css_put(&victim->css);
continue;
return total;
}
-bool mem_cgroup_oom_called(struct task_struct *task)
+static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data)
{
- bool ret = false;
- struct mem_cgroup *mem;
- struct mm_struct *mm;
+ int *val = (int *)data;
+ int x;
+ /*
+ * Logically, we can stop scanning immediately when we find
+ * a memcg is already locked. But condidering unlock ops and
+ * creation/removal of memcg, scan-all is simple operation.
+ */
+ x = atomic_inc_return(&mem->oom_lock);
+ *val = max(x, *val);
+ return 0;
+}
+/*
+ * Check OOM-Killer is already running under our hierarchy.
+ * If someone is running, return false.
+ */
+static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
+{
+ int lock_count = 0;
- rcu_read_lock();
- mm = task->mm;
- if (!mm)
- mm = &init_mm;
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
- ret = true;
- rcu_read_unlock();
- return ret;
+ mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb);
+
+ if (lock_count == 1)
+ return true;
+ return false;
}
-static int record_last_oom_cb(struct mem_cgroup *mem, void *data)
+static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data)
{
- mem->last_oom_jiffies = jiffies;
+ /*
+ * When a new child is created while the hierarchy is under oom,
+ * mem_cgroup_oom_lock() may not be called. We have to use
+ * atomic_add_unless() here.
+ */
+ atomic_add_unless(&mem->oom_lock, -1, 0);
return 0;
}
-static void record_last_oom(struct mem_cgroup *mem)
+static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
+{
+ mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb);
+}
+
+static DEFINE_MUTEX(memcg_oom_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
+
+/*
+ * try to call OOM killer. returns false if we should exit memory-reclaim loop.
+ */
+bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
{
- mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
+ DEFINE_WAIT(wait);
+ bool locked;
+
+ /* At first, try to OOM lock hierarchy under mem.*/
+ mutex_lock(&memcg_oom_mutex);
+ locked = mem_cgroup_oom_lock(mem);
+ /*
+ * Even if signal_pending(), we can't quit charge() loop without
+ * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
+ * under OOM is always welcomed, use TASK_KILLABLE here.
+ */
+ if (!locked)
+ prepare_to_wait(&memcg_oom_waitq, &wait, TASK_KILLABLE);
+ mutex_unlock(&memcg_oom_mutex);
+
+ if (locked)
+ mem_cgroup_out_of_memory(mem, mask);
+ else {
+ schedule();
+ finish_wait(&memcg_oom_waitq, &wait);
+ }
+ mutex_lock(&memcg_oom_mutex);
+ mem_cgroup_oom_unlock(mem);
+ /*
+ * Here, we use global waitq .....more fine grained waitq ?
+ * Assume following hierarchy.
+ * A/
+ * 01
+ * 02
+ * assume OOM happens both in A and 01 at the same time. Tthey are
+ * mutually exclusive by lock. (kill in 01 helps A.)
+ * When we use per memcg waitq, we have to wake up waiters on A and 02
+ * in addtion to waiters on 01. We use global waitq for avoiding mess.
+ * It will not be a big problem.
+ * (And a task may be moved to other groups while it's waiting for OOM.)
+ */
+ wake_up_all(&memcg_oom_waitq);
+ mutex_unlock(&memcg_oom_mutex);
+
+ if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
+ return false;
+ /* Give chance to dying process */
+ schedule_timeout(1);
+ return true;
}
/*
void mem_cgroup_update_file_mapped(struct page *page, int val)
{
struct mem_cgroup *mem;
- struct mem_cgroup_stat *stat;
- struct mem_cgroup_stat_cpu *cpustat;
- int cpu;
struct page_cgroup *pc;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
mem = pc->mem_cgroup;
- if (!mem)
- goto done;
-
- if (!PageCgroupUsed(pc))
+ if (!mem || !PageCgroupUsed(pc))
goto done;
/*
- * Preemption is already disabled, we don't need get_cpu()
+ * Preemption is already disabled. We can use __this_cpu_xxx
*/
- cpu = smp_processor_id();
- stat = &mem->stat;
- cpustat = &stat->cpustat[cpu];
+ if (val > 0) {
+ __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+ SetPageCgroupFileMapped(pc);
+ } else {
+ __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+ ClearPageCgroupFileMapped(pc);
+ }
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val);
done:
unlock_page_cgroup(pc);
}
/*
* Cache charges(val) which is from res_counter, to local per_cpu area.
- * This will be consumed by consumt_stock() function, later.
+ * This will be consumed by consume_stock() function, later.
*/
static void refill_stock(struct mem_cgroup *mem, int val)
{
* oom-killer can be invoked.
*/
static int __mem_cgroup_try_charge(struct mm_struct *mm,
- gfp_t gfp_mask, struct mem_cgroup **memcg,
- bool oom, struct page *page)
+ gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom)
{
struct mem_cgroup *mem, *mem_over_limit;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct res_counter *fail_res;
int csize = CHARGE_SIZE;
- if (unlikely(test_thread_flag(TIF_MEMDIE))) {
- /* Don't account this! */
- *memcg = NULL;
- return 0;
- }
+ /*
+ * Unlike gloval-vm's OOM-kill, we're not in memory shortage
+ * in system level. So, allow to go ahead dying process in addition to
+ * MEMDIE process.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)
+ || fatal_signal_pending(current)))
+ goto bypass;
/*
* We always charge the cgroup the mm_struct belongs to.
unsigned long flags = 0;
if (consume_stock(mem))
- goto charged;
+ goto done;
ret = res_counter_charge(&mem->res, csize, &fail_res);
if (likely(!ret)) {
* There is a small race that "from" or "to" can be
* freed by rmdir, so we use css_tryget().
*/
- rcu_read_lock();
from = mc.from;
to = mc.to;
if (from && css_tryget(&from->css)) {
do_continue = (to == mem_over_limit);
css_put(&to->css);
}
- rcu_read_unlock();
if (do_continue) {
DEFINE_WAIT(wait);
prepare_to_wait(&mc.waitq, &wait,
}
if (!nr_retries--) {
- if (oom) {
- mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
- record_last_oom(mem_over_limit);
+ if (!oom)
+ goto nomem;
+ if (mem_cgroup_handle_oom(mem_over_limit, gfp_mask)) {
+ nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+ continue;
}
- goto nomem;
+ /* When we reach here, current task is dying .*/
+ css_put(&mem->css);
+ goto bypass;
}
}
if (csize > PAGE_SIZE)
refill_stock(mem, csize - PAGE_SIZE);
-charged:
- /*
- * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
- * if they exceeds softlimit.
- */
- if (page && mem_cgroup_soft_limit_check(mem))
- mem_cgroup_update_tree(mem, page);
done:
- if (mem_cgroup_threshold_check(mem))
- mem_cgroup_threshold(mem);
return 0;
nomem:
css_put(&mem->css);
return -ENOMEM;
+bypass:
+ *memcg = NULL;
+ return 0;
}
/*
mem_cgroup_charge_statistics(mem, pc, true);
unlock_page_cgroup(pc);
+ /*
+ * "charge_statistics" updated event counter. Then, check it.
+ * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
+ * if they exceeds softlimit.
+ */
+ memcg_check_events(mem, pc->page);
}
/**
static void __mem_cgroup_move_account(struct page_cgroup *pc,
struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
{
- struct page *page;
- int cpu;
- struct mem_cgroup_stat *stat;
- struct mem_cgroup_stat_cpu *cpustat;
-
VM_BUG_ON(from == to);
VM_BUG_ON(PageLRU(pc->page));
VM_BUG_ON(!PageCgroupLocked(pc));
VM_BUG_ON(!PageCgroupUsed(pc));
VM_BUG_ON(pc->mem_cgroup != from);
- page = pc->page;
- if (page_mapped(page) && !PageAnon(page)) {
- cpu = smp_processor_id();
- /* Update mapped_file data for mem_cgroup "from" */
- stat = &from->stat;
- cpustat = &stat->cpustat[cpu];
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
- -1);
-
- /* Update mapped_file data for mem_cgroup "to" */
- stat = &to->stat;
- cpustat = &stat->cpustat[cpu];
- __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
- 1);
+ if (PageCgroupFileMapped(pc)) {
+ /* Update mapped_file data for mem_cgroup */
+ preempt_disable();
+ __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+ __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+ preempt_enable();
}
mem_cgroup_charge_statistics(from, pc, false);
if (uncharge)
ret = 0;
}
unlock_page_cgroup(pc);
+ /*
+ * check events
+ */
+ memcg_check_events(to, pc->page);
+ memcg_check_events(from, pc->page);
return ret;
}
goto put;
parent = mem_cgroup_from_cont(pcg);
- ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
+ ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
if (ret || !parent)
goto put_back;
prefetchw(pc);
mem = memcg;
- ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
if (ret || !mem)
return ret;
if (!mem)
goto charge_cur_mm;
*ptr = mem;
- ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
+ ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
/* drop extra refcnt from tryget */
css_put(&mem->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
- return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
+ return __mem_cgroup_try_charge(mm, mask, ptr, true);
}
static void
mz = page_cgroup_zoneinfo(pc);
unlock_page_cgroup(pc);
- if (mem_cgroup_soft_limit_check(mem))
- mem_cgroup_update_tree(mem, page);
- if (mem_cgroup_threshold_check(mem))
- mem_cgroup_threshold(mem);
+ memcg_check_events(mem, page);
/* at swapout, this memcg will be accessed to record to swap */
if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
css_put(&mem->css);
}
unlock_page_cgroup(pc);
+ *ptr = mem;
if (mem) {
- ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
- page);
+ ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
css_put(&mem->css);
}
- *ptr = mem;
return ret;
}
mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
{
struct mem_cgroup_idx_data *d = data;
- d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+ d->val += mem_cgroup_read_stat(mem, d->idx);
return 0;
}
s64 val;
/* per cpu stat */
- val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE);
+ val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
s->stat[MCS_CACHE] += val * PAGE_SIZE;
- val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
+ val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
s->stat[MCS_RSS] += val * PAGE_SIZE;
- val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED);
+ val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
- val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
+ val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGIN_COUNT);
s->stat[MCS_PGPGIN] += val;
- val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
+ val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGOUT_COUNT);
s->stat[MCS_PGPGOUT] += val;
if (do_swap_account) {
- val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+ val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
s->stat[MCS_SWAP] += val * PAGE_SIZE;
}
return 0;
}
-static bool mem_cgroup_threshold_check(struct mem_cgroup *mem)
-{
- bool ret = false;
- int cpu;
- s64 val;
- struct mem_cgroup_stat_cpu *cpustat;
-
- cpu = get_cpu();
- cpustat = &mem->stat.cpustat[cpu];
- val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_THRESHOLDS);
- if (unlikely(val < 0)) {
- __mem_cgroup_stat_set_safe(cpustat, MEM_CGROUP_STAT_THRESHOLDS,
- THRESHOLDS_EVENTS_THRESH);
- ret = true;
- }
- put_cpu();
- return ret;
-}
-
static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
{
struct mem_cgroup_threshold_ary *t;
}
}
- /*
- * We need to increment refcnt to be sure that all thresholds
- * will be unregistered before calling __mem_cgroup_free()
- */
- mem_cgroup_get(memcg);
-
if (type == _MEM)
rcu_assign_pointer(memcg->thresholds, thresholds_new);
else
/* To be sure that nobody uses thresholds before freeing it */
synchronize_rcu();
- for (i = 0; i < thresholds->size - size; i++)
- mem_cgroup_put(memcg);
-
kfree(thresholds);
unlock:
mutex_unlock(&memcg->thresholds_lock);
kfree(mem->info.nodeinfo[node]);
}
-static int mem_cgroup_size(void)
-{
- int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu);
- return sizeof(struct mem_cgroup) + cpustat_size;
-}
-
static struct mem_cgroup *mem_cgroup_alloc(void)
{
struct mem_cgroup *mem;
- int size = mem_cgroup_size();
+ int size = sizeof(struct mem_cgroup);
+ /* Can be very big if MAX_NUMNODES is very big */
if (size < PAGE_SIZE)
mem = kmalloc(size, GFP_KERNEL);
else
mem = vmalloc(size);
- if (mem)
- memset(mem, 0, size);
+ if (!mem)
+ return NULL;
+
+ memset(mem, 0, size);
+ mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
+ if (!mem->stat) {
+ if (size < PAGE_SIZE)
+ kfree(mem);
+ else
+ vfree(mem);
+ mem = NULL;
+ }
return mem;
}
for_each_node_state(node, N_POSSIBLE)
free_mem_cgroup_per_zone_info(mem, node);
- if (mem_cgroup_size() < PAGE_SIZE)
+ free_percpu(mem->stat);
+ if (sizeof(struct mem_cgroup) < PAGE_SIZE)
kfree(mem);
else
vfree(mem);
batch_count = PRECHARGE_COUNT_AT_ONCE;
cond_resched();
}
- ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem,
- false, NULL);
+ ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
if (ret || !mem)
/* mem_cgroup_clear_mc() will do uncharge later */
return -ENOMEM;
}
return ret;
}
-#else /* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
- struct cgroup *cgroup,
- struct task_struct *p,
- bool threadgroup)
-{
- return 0;
-}
-static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
- struct cgroup *cgroup,
- struct task_struct *p,
- bool threadgroup)
-{
-}
-static void mem_cgroup_move_task(struct cgroup_subsys *ss,
- struct cgroup *cont,
- struct cgroup *old_cont,
- struct task_struct *p,
- bool threadgroup)
-{
-}
-#endif
/**
* is_target_pte_for_mc - check a pte whether it is valid for move charge
}
mem_cgroup_clear_mc();
}
+#else /* !CONFIG_MMU */
+static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
+ struct cgroup *cgroup,
+ struct task_struct *p,
+ bool threadgroup)
+{
+ return 0;
+}
+static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
+ struct cgroup *cgroup,
+ struct task_struct *p,
+ bool threadgroup)
+{
+}
+static void mem_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p,
+ bool threadgroup)
+{
+}
+#endif
struct cgroup_subsys mem_cgroup_subsys = {
.name = "memory",