X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Foom_kill.c;h=92bcf1db16b24ea2c9b4d9f6c93043118c021615;hb=184101bf143ac96d62b3dcc17e7b3550f98d3350;hp=4194b9db0104d54dea5ea871e0e111695fa49cba;hpb=fef1bdd68c81b71882ccb6f47c70980a03182063;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4194b9d..92bcf1d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -26,11 +26,12 @@ #include #include #include +#include int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks; -static DEFINE_SPINLOCK(zone_scan_mutex); +static DEFINE_SPINLOCK(zone_scan_lock); /* #define DEBUG */ /** @@ -52,10 +53,9 @@ static DEFINE_SPINLOCK(zone_scan_mutex); * of least surprise ... (be careful when you change it) */ -unsigned long badness(struct task_struct *p, unsigned long uptime, - struct mem_cgroup *mem) +unsigned long badness(struct task_struct *p, unsigned long uptime) { - unsigned long points, cpu_time, run_time, s; + unsigned long points, cpu_time, run_time; struct mm_struct *mm; struct task_struct *child; @@ -110,12 +110,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime, else run_time = 0; - s = int_sqrt(cpu_time); - if (s) - points /= s; - s = int_sqrt(int_sqrt(run_time)); - if (s) - points /= s; + if (cpu_time) + points /= int_sqrt(cpu_time); + if (run_time) + points /= int_sqrt(int_sqrt(run_time)); /* * Niced processes are most likely less important, so double @@ -128,7 +126,8 @@ unsigned long badness(struct task_struct *p, unsigned long uptime, * Superuser processes are usually more important, so we make it * less likely that we kill those. */ - if (__capable(p, CAP_SYS_ADMIN) || __capable(p, CAP_SYS_RESOURCE)) + if (has_capability_noaudit(p, CAP_SYS_ADMIN) || + has_capability_noaudit(p, CAP_SYS_RESOURCE)) points /= 4; /* @@ -137,7 +136,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime, * tend to only have this flag set on applications they think * of as important. */ - if (__capable(p, CAP_SYS_RAWIO)) + if (has_capability_noaudit(p, CAP_SYS_RAWIO)) points /= 4; /* @@ -174,12 +173,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) { #ifdef CONFIG_NUMA - struct zone **z; + struct zone *zone; + struct zoneref *z; + enum zone_type high_zoneidx = gfp_zone(gfp_mask); nodemask_t nodes = node_states[N_HIGH_MEMORY]; - for (z = zonelist->zones; *z; z++) - if (cpuset_zone_allowed_softwall(*z, gfp_mask)) - node_clear(zone_to_nid(*z), nodes); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) + if (cpuset_zone_allowed_softwall(zone, gfp_mask)) + node_clear(zone_to_nid(zone), nodes); else return CONSTRAINT_CPUSET; @@ -253,7 +254,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, if (p->oomkilladj == OOM_DISABLE) continue; - points = badness(p, uptime.tv_sec, mem); + points = badness(p, uptime.tv_sec); if (points > *ppoints || !chosen) { chosen = p; *ppoints = points; @@ -264,6 +265,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, } /** + * dump_tasks - dump current memory state of all system tasks + * @mem: target memory controller + * * Dumps the current memory state of all system tasks, excluding kernel threads. * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj * score, and name. @@ -288,17 +292,19 @@ static void dump_tasks(const struct mem_cgroup *mem) continue; if (mem && !task_in_mem_cgroup(p, mem)) continue; + if (!thread_group_leader(p)) + continue; task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->uid, p->tgid, p->mm->total_vm, - get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, - p->comm); + p->pid, __task_cred(p)->uid, p->tgid, + p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), + p->oomkilladj, p->comm); task_unlock(p); } while_each_thread(g, p); } -/** +/* * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO * set. @@ -384,7 +390,11 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, printk(KERN_WARNING "%s invoked oom-killer: " "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", current->comm, gfp_mask, order, current->oomkilladj); + task_lock(current); + cpuset_print_task_mems_allowed(current); + task_unlock(current); dump_stack(); + mem_cgroup_print_oom_info(mem, current); show_mem(); if (sysctl_oom_dump_tasks) dump_tasks(mem); @@ -412,14 +422,13 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, return oom_kill_task(p); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) { unsigned long points = 0; struct task_struct *p; - cgroup_lock(); - rcu_read_lock(); + read_lock(&tasklist_lock); retry: p = select_bad_process(&points, mem); if (PTR_ERR(p) == -1UL) @@ -432,8 +441,7 @@ retry: "Memory cgroup out of memory")) goto retry; out: - rcu_read_unlock(); - cgroup_unlock(); + read_unlock(&tasklist_lock); } #endif @@ -456,31 +464,31 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); * if a parallel OOM killing is already taking place that includes a zone in * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. */ -int try_set_zone_oom(struct zonelist *zonelist) +int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask) { - struct zone **z; + struct zoneref *z; + struct zone *zone; int ret = 1; - z = zonelist->zones; - - spin_lock(&zone_scan_mutex); - do { - if (zone_is_oom_locked(*z)) { + spin_lock(&zone_scan_lock); + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { + if (zone_is_oom_locked(zone)) { ret = 0; goto out; } - } while (*(++z) != NULL); + } + + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { + /* + * Lock each zone in the zonelist under zone_scan_lock so a + * parallel invocation of try_set_zone_oom() doesn't succeed + * when it shouldn't. + */ + zone_set_flag(zone, ZONE_OOM_LOCKED); + } - /* - * Lock each zone in the zonelist under zone_scan_mutex so a parallel - * invocation of try_set_zone_oom() doesn't succeed when it shouldn't. - */ - z = zonelist->zones; - do { - zone_set_flag(*z, ZONE_OOM_LOCKED); - } while (*(++z) != NULL); out: - spin_unlock(&zone_scan_mutex); + spin_unlock(&zone_scan_lock); return ret; } @@ -489,21 +497,92 @@ out: * allocation attempts with zonelists containing them may now recall the OOM * killer, if necessary. */ -void clear_zonelist_oom(struct zonelist *zonelist) +void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) +{ + struct zoneref *z; + struct zone *zone; + + spin_lock(&zone_scan_lock); + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { + zone_clear_flag(zone, ZONE_OOM_LOCKED); + } + spin_unlock(&zone_scan_lock); +} + +/* + * Must be called with tasklist_lock held for read. + */ +static void __out_of_memory(gfp_t gfp_mask, int order) +{ + struct task_struct *p; + unsigned long points; + + if (sysctl_oom_kill_allocating_task) + if (!oom_kill_process(current, gfp_mask, order, 0, NULL, + "Out of memory (oom_kill_allocating_task)")) + return; +retry: + /* + * Rambo mode: Shoot down a process and hope it solves whatever + * issues we may have. + */ + p = select_bad_process(&points, NULL); + + if (PTR_ERR(p) == -1UL) + return; + + /* Found nothing?!?! Either we hang forever, or we panic. */ + if (!p) { + read_unlock(&tasklist_lock); + panic("Out of memory and no killable processes...\n"); + } + + if (oom_kill_process(p, gfp_mask, order, points, NULL, + "Out of memory")) + goto retry; +} + +/* + * pagefault handler calls into here because it is out of memory but + * doesn't know exactly how or why. + */ +void pagefault_out_of_memory(void) { - struct zone **z; + unsigned long freed = 0; - z = zonelist->zones; + blocking_notifier_call_chain(&oom_notify_list, 0, &freed); + if (freed > 0) + /* Got some memory back in the last second. */ + return; - spin_lock(&zone_scan_mutex); - do { - zone_clear_flag(*z, ZONE_OOM_LOCKED); - } while (*(++z) != NULL); - spin_unlock(&zone_scan_mutex); + /* + * If this is from memcg, oom-killer is already invoked. + * and not worth to go system-wide-oom. + */ + if (mem_cgroup_oom_called(current)) + goto rest_and_return; + + if (sysctl_panic_on_oom) + panic("out of memory from page fault. panic_on_oom is selected.\n"); + + read_lock(&tasklist_lock); + __out_of_memory(0, 0); /* unknown gfp_mask and order */ + read_unlock(&tasklist_lock); + + /* + * Give "p" a good chance of killing itself before we + * retry to allocate memory. + */ +rest_and_return: + if (!test_thread_flag(TIF_MEMDIE)) + schedule_timeout_uninterruptible(1); } /** * out_of_memory - kill the "best" process when we run out of memory + * @zonelist: zonelist pointer + * @gfp_mask: memory allocation flags + * @order: amount of memory being requested as a power of 2 * * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) @@ -512,8 +591,6 @@ void clear_zonelist_oom(struct zonelist *zonelist) */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - struct task_struct *p; - unsigned long points = 0; unsigned long freed = 0; enum oom_constraint constraint; @@ -534,7 +611,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) switch (constraint) { case CONSTRAINT_MEMORY_POLICY: - oom_kill_process(current, gfp_mask, order, points, NULL, + oom_kill_process(current, gfp_mask, order, 0, NULL, "No available memory (MPOL_BIND)"); break; @@ -543,35 +620,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) panic("out of memory. panic_on_oom is selected\n"); /* Fall-through */ case CONSTRAINT_CPUSET: - if (sysctl_oom_kill_allocating_task) { - oom_kill_process(current, gfp_mask, order, points, NULL, - "Out of memory (oom_kill_allocating_task)"); - break; - } -retry: - /* - * Rambo mode: Shoot down a process and hope it solves whatever - * issues we may have. - */ - p = select_bad_process(&points, NULL); - - if (PTR_ERR(p) == -1UL) - goto out; - - /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!p) { - read_unlock(&tasklist_lock); - panic("Out of memory and no killable processes...\n"); - } - - if (oom_kill_process(p, gfp_mask, order, points, NULL, - "Out of memory")) - goto retry; - + __out_of_memory(gfp_mask, order); break; } -out: read_unlock(&tasklist_lock); /*