+ rcu_read_lock();
+ ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX);
+ if (ret < 0) {
+ rcu_read_unlock();
+ goto done;
+ }
+ rcu_read_unlock();
+
+ /*
+ * Continues from above, so we don't need an KERN_ level
+ */
+ printk(KERN_CONT " as a result of limit of %s\n", memcg_name);
+done:
+
+ printk(KERN_INFO "memory: usage %llukB, limit %llukB, failcnt %llu\n",
+ res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
+ res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
+ res_counter_read_u64(&memcg->res, RES_FAILCNT));
+ printk(KERN_INFO "memory+swap: usage %llukB, limit %llukB, "
+ "failcnt %llu\n",
+ res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10,
+ res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10,
+ res_counter_read_u64(&memcg->memsw, RES_FAILCNT));
+}
+
+/*
+ * This function returns the number of memcg under hierarchy tree. Returns
+ * 1(self count) if no children.
+ */
+static int mem_cgroup_count_children(struct mem_cgroup *mem)
+{
+ int num = 0;
+ mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb);
+ return num;
+}
+
+/*
+ * Visit the first child (need not be the first child as per the ordering
+ * of the cgroup list, since we track last_scanned_child) of @mem and use
+ * that to reclaim free pages from.
+ */
+static struct mem_cgroup *
+mem_cgroup_select_victim(struct mem_cgroup *root_mem)
+{
+ struct mem_cgroup *ret = NULL;
+ struct cgroup_subsys_state *css;
+ int nextid, found;
+
+ if (!root_mem->use_hierarchy) {
+ css_get(&root_mem->css);
+ ret = root_mem;
+ }
+
+ while (!ret) {
+ rcu_read_lock();
+ nextid = root_mem->last_scanned_child + 1;
+ css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css,
+ &found);
+ if (css && css_tryget(css))
+ ret = container_of(css, struct mem_cgroup, css);
+
+ rcu_read_unlock();
+ /* Updates scanning parameter */
+ spin_lock(&root_mem->reclaim_param_lock);
+ if (!css) {
+ /* this means start scan from ID:1 */
+ root_mem->last_scanned_child = 0;
+ } else
+ root_mem->last_scanned_child = found;
+ spin_unlock(&root_mem->reclaim_param_lock);
+ }
+
+ return ret;
+}
+
+/*
+ * Scan the hierarchy if needed to reclaim memory. We remember the last child
+ * we reclaimed from, so that we don't end up penalizing one child extensively
+ * based on its position in the children list.
+ *
+ * root_mem is the original ancestor that we've been reclaim from.
+ *
+ * We give up and return to the caller when we visit root_mem twice.
+ * (other groups can be removed while we're walking....)
+ *
+ * If shrink==true, for avoiding to free too much, this returns immedieately.
+ */
+static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
+ gfp_t gfp_mask, bool noswap, bool shrink)
+{
+ struct mem_cgroup *victim;
+ int ret, total = 0;
+ int loop = 0;
+
+ while (loop < 2) {
+ victim = mem_cgroup_select_victim(root_mem);
+ if (victim == root_mem)
+ loop++;
+ if (!mem_cgroup_local_usage(&victim->stat)) {
+ /* this cgroup's local usage == 0 */
+ css_put(&victim->css);
+ continue;
+ }
+ /* we use swappiness of local cgroup */
+ ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
+ get_swappiness(victim));
+ css_put(&victim->css);
+ /*
+ * At shrinking usage, we can't check we should stop here or
+ * reclaim more. It's depends on callers. last_scanned_child
+ * will work enough for keeping fairness under tree.
+ */
+ if (shrink)
+ return ret;
+ total += ret;
+ if (mem_cgroup_check_under_limit(root_mem))
+ return 1 + total;
+ }
+ return total;
+}
+
+bool mem_cgroup_oom_called(struct task_struct *task)
+{
+ bool ret = false;
+ struct mem_cgroup *mem;
+ struct mm_struct *mm;
+
+ rcu_read_lock();
+ mm = task->mm;
+ if (!mm)
+ mm = &init_mm;
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
+ ret = true;
+ rcu_read_unlock();
+ return ret;
+}
+
+static int record_last_oom_cb(struct mem_cgroup *mem, void *data)
+{
+ mem->last_oom_jiffies = jiffies;
+ return 0;
+}
+
+static void record_last_oom(struct mem_cgroup *mem)
+{
+ mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
+}
+
+
+/*
+ * Unlike exported interface, "oom" parameter is added. if oom==true,
+ * oom-killer can be invoked.
+ */
+static int __mem_cgroup_try_charge(struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcg,
+ bool oom)
+{
+ struct mem_cgroup *mem, *mem_over_limit;
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+ struct res_counter *fail_res;
+
+ if (unlikely(test_thread_flag(TIF_MEMDIE))) {
+ /* Don't account this! */
+ *memcg = NULL;
+ return 0;
+ }
+
+ /*
+ * We always charge the cgroup the mm_struct belongs to.
+ * The mm_struct's mem_cgroup changes on task migration if the
+ * thread group leader migrates. It's possible that mm is not
+ * set, if so charge the init_mm (happens for pagecache usage).
+ */
+ mem = *memcg;
+ if (likely(!mem)) {
+ mem = try_get_mem_cgroup_from_mm(mm);
+ *memcg = mem;
+ } else {
+ css_get(&mem->css);
+ }
+ if (unlikely(!mem))
+ return 0;
+
+ VM_BUG_ON(mem_cgroup_is_obsolete(mem));
+
+ while (1) {
+ int ret;
+ bool noswap = false;
+
+ ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+ if (likely(!ret)) {
+ if (!do_swap_account)
+ break;
+ ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
+ &fail_res);
+ if (likely(!ret))
+ break;
+ /* mem+swap counter fails */
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ noswap = true;
+ mem_over_limit = mem_cgroup_from_res_counter(fail_res,
+ memsw);
+ } else
+ /* mem counter fails */
+ mem_over_limit = mem_cgroup_from_res_counter(fail_res,
+ res);
+
+ if (!(gfp_mask & __GFP_WAIT))
+ goto nomem;
+
+ ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
+ noswap, false);
+ if (ret)
+ continue;
+
+ /*
+ * try_to_free_mem_cgroup_pages() might not give us a full
+ * picture of reclaim. Some pages are reclaimed and might be
+ * moved to swap cache or just unmapped from the cgroup.
+ * Check the limit again to see if the reclaim reduced the
+ * current usage of the cgroup before giving up
+ *
+ */
+ if (mem_cgroup_check_under_limit(mem_over_limit))
+ continue;
+
+ if (!nr_retries--) {
+ if (oom) {
+ mutex_lock(&memcg_tasklist);
+ mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
+ mutex_unlock(&memcg_tasklist);
+ record_last_oom(mem_over_limit);
+ }
+ goto nomem;
+ }
+ }
+ return 0;
+nomem:
+ css_put(&mem->css);
+ return -ENOMEM;
+}
+
+
+/*
+ * A helper function to get mem_cgroup from ID. must be called under
+ * rcu_read_lock(). The caller must check css_is_removed() or some if
+ * it's concern. (dropping refcnt from swap can be called against removed
+ * memcg.)
+ */
+static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
+{
+ struct cgroup_subsys_state *css;
+
+ /* ID 0 is unused ID */
+ if (!id)
+ return NULL;
+ css = css_lookup(&mem_cgroup_subsys, id);
+ if (!css)
+ return NULL;
+ return container_of(css, struct mem_cgroup, css);
+}
+
+static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
+{
+ struct mem_cgroup *mem;
+ struct page_cgroup *pc;
+ unsigned short id;
+ swp_entry_t ent;
+
+ VM_BUG_ON(!PageLocked(page));
+
+ if (!PageSwapCache(page))
+ return NULL;
+
+ pc = lookup_page_cgroup(page);
+ /*
+ * Used bit of swapcache is solid under page lock.
+ */
+ if (PageCgroupUsed(pc)) {
+ mem = pc->mem_cgroup;
+ if (mem && !css_tryget(&mem->css))
+ mem = NULL;
+ } else {
+ ent.val = page_private(page);
+ id = lookup_swap_cgroup(ent);
+ rcu_read_lock();
+ mem = mem_cgroup_lookup(id);
+ if (mem && !css_tryget(&mem->css))
+ mem = NULL;
+ rcu_read_unlock();
+ }
+ return mem;
+}
+
+/*
+ * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
+ * USED state. If already USED, uncharge and return.
+ */
+
+static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
+ struct page_cgroup *pc,
+ enum charge_type ctype)
+{
+ /* try_charge() can return NULL to *memcg, taking care of it. */
+ if (!mem)
+ return;
+
+ lock_page_cgroup(pc);
+ if (unlikely(PageCgroupUsed(pc))) {
+ unlock_page_cgroup(pc);
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ if (do_swap_account)
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ css_put(&mem->css);
+ return;
+ }
+ pc->mem_cgroup = mem;
+ smp_wmb();
+ pc->flags = pcg_default_flags[ctype];
+
+ mem_cgroup_charge_statistics(mem, pc, true);
+
+ unlock_page_cgroup(pc);
+}
+
+/**
+ * mem_cgroup_move_account - move account of the page
+ * @pc: page_cgroup of the page.
+ * @from: mem_cgroup which the page is moved from.
+ * @to: mem_cgroup which the page is moved to. @from != @to.
+ *
+ * The caller must confirm following.
+ * - page is not on LRU (isolate_page() is useful.)
+ *
+ * returns 0 at success,
+ * returns -EBUSY when lock is busy or "pc" is unstable.
+ *
+ * This function does "uncharge" from old cgroup but doesn't do "charge" to
+ * new cgroup. It should be done by a caller.
+ */
+
+static int mem_cgroup_move_account(struct page_cgroup *pc,
+ struct mem_cgroup *from, struct mem_cgroup *to)
+{
+ struct mem_cgroup_per_zone *from_mz, *to_mz;
+ int nid, zid;
+ int ret = -EBUSY;
+
+ VM_BUG_ON(from == to);
+ VM_BUG_ON(PageLRU(pc->page));
+
+ nid = page_cgroup_nid(pc);
+ zid = page_cgroup_zid(pc);
+ from_mz = mem_cgroup_zoneinfo(from, nid, zid);
+ to_mz = mem_cgroup_zoneinfo(to, nid, zid);
+
+ if (!trylock_page_cgroup(pc))
+ return ret;
+
+ if (!PageCgroupUsed(pc))
+ goto out;
+
+ if (pc->mem_cgroup != from)
+ goto out;
+
+ res_counter_uncharge(&from->res, PAGE_SIZE);
+ mem_cgroup_charge_statistics(from, pc, false);
+ if (do_swap_account)
+ res_counter_uncharge(&from->memsw, PAGE_SIZE);
+ css_put(&from->css);
+
+ css_get(&to->css);
+ pc->mem_cgroup = to;
+ mem_cgroup_charge_statistics(to, pc, true);
+ ret = 0;
+out:
+ unlock_page_cgroup(pc);
+ return ret;
+}
+
+/*
+ * move charges to its parent.
+ */
+
+static int mem_cgroup_move_parent(struct page_cgroup *pc,
+ struct mem_cgroup *child,
+ gfp_t gfp_mask)
+{
+ struct page *page = pc->page;
+ struct cgroup *cg = child->css.cgroup;
+ struct cgroup *pcg = cg->parent;
+ struct mem_cgroup *parent;
+ int ret;
+
+ /* Is ROOT ? */
+ if (!pcg)
+ return -EINVAL;
+
+
+ parent = mem_cgroup_from_cont(pcg);
+
+
+ ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+ if (ret || !parent)
+ return ret;