X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=f76db9dcaa05e592f3138f5389ae4c22827b58d9;hb=2d29c6a075787f2c1bc49b86a084d2b878f72fc4;hp=eab7bd6628e0ad48ef5f1d4fcea2af05f173bae5;hpb=d294eb83d8d39a29f01dad391f15fc3a29aa04f9;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/cpuset.c b/kernel/cpuset.c index eab7bd6..f76db9d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,14 @@ #include /* + * Workqueue for cpuset related tasks. + * + * Using kevent workqueue may cause deadlock when memory_migrate + * is set. So we create a separate workqueue thread for cpuset. + */ +static struct workqueue_struct *cpuset_wq; + +/* * Tracks how many cpusets are currently defined in system. * When there is only one cpuset (the root cpuset) we can * short circuit some hooks. @@ -83,7 +92,7 @@ struct cpuset { struct cgroup_subsys_state css; unsigned long flags; /* "unsigned long" so bitops work */ - cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ + cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ struct cpuset *parent; /* my parent */ @@ -194,8 +203,6 @@ static int cpuset_mems_generation; static struct cpuset top_cpuset = { .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), - .cpus_allowed = CPU_MASK_ALL, - .mems_allowed = NODE_MASK_ALL, }; /* @@ -239,6 +246,17 @@ static struct cpuset top_cpuset = { static DEFINE_MUTEX(callback_mutex); /* + * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist + * buffers. They are statically allocated to prevent using excess stack + * when calling cpuset_print_task_mems_allowed(). + */ +#define CPUSET_NAME_LEN (128) +#define CPUSET_NODELIST_LEN (256) +static char cpuset_name[CPUSET_NAME_LEN]; +static char cpuset_nodelist[CPUSET_NODELIST_LEN]; +static DEFINE_SPINLOCK(cpuset_buffer_lock); + +/* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead @@ -266,7 +284,7 @@ static struct file_system_type cpuset_fs_type = { }; /* - * Return in *pmask the portion of a cpusets's cpus_allowed that + * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus. If we get * all the way to the top and still haven't found any online cpus, @@ -279,15 +297,16 @@ static struct file_system_type cpuset_fs_type = { * Call with callback_mutex held. */ -static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) +static void guarantee_online_cpus(const struct cpuset *cs, + struct cpumask *pmask) { - while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map)) + while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) cs = cs->parent; if (cs) - cpus_and(*pmask, cs->cpus_allowed, cpu_online_map); + cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); else - *pmask = cpu_online_map; - BUG_ON(!cpus_intersects(*pmask, cpu_online_map)); + cpumask_copy(pmask, cpu_online_mask); + BUG_ON(!cpumask_intersects(pmask, cpu_online_mask)); } /* @@ -363,14 +382,9 @@ void cpuset_update_task_memory_state(void) struct task_struct *tsk = current; struct cpuset *cs; - if (task_cs(tsk) == &top_cpuset) { - /* Don't need rcu for top_cpuset. It's never freed. */ - my_cpusets_mem_gen = top_cpuset.mems_generation; - } else { - rcu_read_lock(); - my_cpusets_mem_gen = task_cs(tsk)->mems_generation; - rcu_read_unlock(); - } + rcu_read_lock(); + my_cpusets_mem_gen = task_cs(tsk)->mems_generation; + rcu_read_unlock(); if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { mutex_lock(&callback_mutex); @@ -402,12 +416,43 @@ void cpuset_update_task_memory_state(void) static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { - return cpus_subset(p->cpus_allowed, q->cpus_allowed) && + return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); } +/** + * alloc_trial_cpuset - allocate a trial cpuset + * @cs: the cpuset that the trial cpuset duplicates + */ +static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs) +{ + struct cpuset *trial; + + trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); + if (!trial) + return NULL; + + if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) { + kfree(trial); + return NULL; + } + cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); + + return trial; +} + +/** + * free_trial_cpuset - free the trial cpuset + * @trial: the trial cpuset to be freed + */ +static void free_trial_cpuset(struct cpuset *trial) +{ + free_cpumask_var(trial->cpus_allowed); + kfree(trial); +} + /* * validate_change() - Used to validate that any proposed cpuset change * follows the structural rules for cpusets. @@ -457,7 +502,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) c = cgroup_cs(cont); if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && - cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) + cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) return -EINVAL; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && @@ -467,7 +512,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ if (cgroup_task_count(cur->css.cgroup)) { - if (cpus_empty(trial->cpus_allowed) || + if (cpumask_empty(trial->cpus_allowed) || nodes_empty(trial->mems_allowed)) { return -ENOSPC; } @@ -482,7 +527,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) */ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) { - return cpus_intersects(a->cpus_allowed, b->cpus_allowed); + return cpumask_intersects(a->cpus_allowed, b->cpus_allowed); } static void @@ -507,7 +552,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) cp = list_first_entry(&q, struct cpuset, stack_list); list_del(q.next); - if (cpus_empty(cp->cpus_allowed)) + if (cpumask_empty(cp->cpus_allowed)) continue; if (is_sched_load_balance(cp)) @@ -531,7 +576,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this @@ -574,7 +619,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */ -static int generate_sched_domains(cpumask_t **domains, +/* FIXME: see the FIXME in partition_sched_domains() */ +static int generate_sched_domains(struct cpumask **domains, struct sched_domain_attr **attributes) { LIST_HEAD(q); /* queue of cpusets to be scanned */ @@ -582,19 +628,18 @@ static int generate_sched_domains(cpumask_t **domains, struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ - cpumask_t *doms; /* resulting partition; i.e. sched domains */ + struct cpumask *doms; /* resulting partition; i.e. sched domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */ - int ndoms; /* number of sched domains in result */ - int nslot; /* next empty doms[] cpumask_t slot */ + int ndoms = 0; /* number of sched domains in result */ + int nslot; /* next empty doms[] struct cpumask slot */ - ndoms = 0; doms = NULL; dattr = NULL; csa = NULL; /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { - doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + doms = kmalloc(cpumask_size(), GFP_KERNEL); if (!doms) goto done; @@ -603,7 +648,7 @@ static int generate_sched_domains(cpumask_t **domains, *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } - *doms = top_cpuset.cpus_allowed; + cpumask_copy(doms, top_cpuset.cpus_allowed); ndoms = 1; goto done; @@ -622,7 +667,7 @@ static int generate_sched_domains(cpumask_t **domains, cp = list_first_entry(&q, struct cpuset, stack_list); list_del(q.next); - if (cpus_empty(cp->cpus_allowed)) + if (cpumask_empty(cp->cpus_allowed)) continue; /* @@ -673,11 +718,9 @@ restart: * Now we know how many domains to create. * Convert to and populate cpu masks. */ - doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); - if (!doms) { - ndoms = 0; + doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); + if (!doms) goto done; - } /* * The rest of the code, including the scheduler, can deal with @@ -687,7 +730,7 @@ restart: for (nslot = 0, i = 0; i < csn; i++) { struct cpuset *a = csa[i]; - cpumask_t *dp; + struct cpumask *dp; int apn = a->pn; if (apn < 0) { @@ -710,14 +753,14 @@ restart: continue; } - cpus_clear(*dp); + cpumask_clear(dp); if (dattr) *(dattr + nslot) = SD_ATTR_INIT; for (j = i; j < csn; j++) { struct cpuset *b = csa[j]; if (apn == b->pn) { - cpus_or(*dp, *dp, b->cpus_allowed); + cpumask_or(dp, dp, b->cpus_allowed); if (dattr) update_domain_attr_tree(dattr + nslot, b); @@ -732,6 +775,13 @@ restart: done: kfree(csa); + /* + * Fallback to the default domain if kmalloc() failed. + * See comments in partition_sched_domains(). + */ + if (doms == NULL) + ndoms = 1; + *domains = doms; *attributes = dattr; return ndoms; @@ -750,7 +800,7 @@ done: static void do_rebuild_sched_domains(struct work_struct *unused) { struct sched_domain_attr *attr; - cpumask_t *doms; + struct cpumask *doms; int ndoms; get_online_cpus(); @@ -789,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); */ static void async_rebuild_sched_domains(void) { - schedule_work(&rebuild_sched_domains_work); + queue_work(cpuset_wq, &rebuild_sched_domains_work); } /* @@ -819,7 +869,7 @@ void rebuild_sched_domains(void) static int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) { - return !cpus_equal(tsk->cpus_allowed, + return !cpumask_equal(&tsk->cpus_allowed, (cgroup_cs(scan->cg))->cpus_allowed); } @@ -837,7 +887,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk, static void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) { - set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); + set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed)); } /** @@ -869,10 +919,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) * @cs: the cpuset to consider * @buf: buffer of cpu numbers written to this cpuset */ -static int update_cpumask(struct cpuset *cs, const char *buf) +static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, + const char *buf) { struct ptr_heap heap; - struct cpuset trialcs; int retval; int is_load_balanced; @@ -880,8 +930,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf) if (cs == &top_cpuset) return -EACCES; - trialcs = *cs; - /* * An empty cpus_allowed is ok only if the cpuset has no tasks. * Since cpulist_parse() fails on an empty mask, we special case @@ -889,31 +937,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf) * with tasks have cpus. */ if (!*buf) { - cpus_clear(trialcs.cpus_allowed); + cpumask_clear(trialcs->cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs.cpus_allowed); + retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) return retval; - if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) + if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) return -EINVAL; } - retval = validate_change(cs, &trialcs); + retval = validate_change(cs, trialcs); if (retval < 0) return retval; /* Nothing to do if the cpus didn't change */ - if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) + if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) return 0; retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); if (retval) return retval; - is_load_balanced = is_sched_load_balance(&trialcs); + is_load_balanced = is_sched_load_balance(trialcs); mutex_lock(&callback_mutex); - cs->cpus_allowed = trialcs.cpus_allowed; + cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); /* @@ -1001,7 +1049,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ fudge = 10; /* spare mmarray[] slots */ - fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ + fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */ retval = -ENOMEM; /* @@ -1088,9 +1136,9 @@ done: * lock each such tasks mm->mmap_sem, scan its vma's and rebind * their mempolicies to the cpusets new mems_allowed. */ -static int update_nodemask(struct cpuset *cs, const char *buf) +static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, + const char *buf) { - struct cpuset trialcs; nodemask_t oldmem; int retval; @@ -1101,8 +1149,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf) if (cs == &top_cpuset) return -EACCES; - trialcs = *cs; - /* * An empty mems_allowed is ok iff there are no tasks in the cpuset. * Since nodelist_parse() fails on an empty mask, we special case @@ -1110,27 +1156,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf) * with tasks have memory. */ if (!*buf) { - nodes_clear(trialcs.mems_allowed); + nodes_clear(trialcs->mems_allowed); } else { - retval = nodelist_parse(buf, trialcs.mems_allowed); + retval = nodelist_parse(buf, trialcs->mems_allowed); if (retval < 0) goto done; - if (!nodes_subset(trialcs.mems_allowed, + if (!nodes_subset(trialcs->mems_allowed, node_states[N_HIGH_MEMORY])) return -EINVAL; } oldmem = cs->mems_allowed; - if (nodes_equal(oldmem, trialcs.mems_allowed)) { + if (nodes_equal(oldmem, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; } - retval = validate_change(cs, &trialcs); + retval = validate_change(cs, trialcs); if (retval < 0) goto done; mutex_lock(&callback_mutex); - cs->mems_allowed = trialcs.mems_allowed; + cs->mems_allowed = trialcs->mems_allowed; cs->mems_generation = cpuset_mems_generation++; mutex_unlock(&callback_mutex); @@ -1151,7 +1197,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) if (val != cs->relax_domain_level) { cs->relax_domain_level = val; - if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) + if (!cpumask_empty(cs->cpus_allowed) && + is_sched_load_balance(cs)) async_rebuild_sched_domains(); } @@ -1170,32 +1217,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { - struct cpuset trialcs; + struct cpuset *trialcs; int err; - int cpus_nonempty, balance_flag_changed; + int balance_flag_changed; + + trialcs = alloc_trial_cpuset(cs); + if (!trialcs) + return -ENOMEM; - trialcs = *cs; if (turning_on) - set_bit(bit, &trialcs.flags); + set_bit(bit, &trialcs->flags); else - clear_bit(bit, &trialcs.flags); + clear_bit(bit, &trialcs->flags); - err = validate_change(cs, &trialcs); + err = validate_change(cs, trialcs); if (err < 0) - return err; + goto out; - cpus_nonempty = !cpus_empty(trialcs.cpus_allowed); balance_flag_changed = (is_sched_load_balance(cs) != - is_sched_load_balance(&trialcs)); + is_sched_load_balance(trialcs)); mutex_lock(&callback_mutex); - cs->flags = trialcs.flags; + cs->flags = trialcs->flags; mutex_unlock(&callback_mutex); - if (cpus_nonempty && balance_flag_changed) + if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) async_rebuild_sched_domains(); - return 0; +out: + free_trial_cpuset(trialcs); + return err; } /* @@ -1296,42 +1347,47 @@ static int fmeter_getrate(struct fmeter *fmp) return val; } +/* Protected by cgroup_lock */ +static cpumask_var_t cpus_attach; + /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct task_struct *tsk) { struct cpuset *cs = cgroup_cs(cont); + int ret = 0; - if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) + if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) return -ENOSPC; - if (tsk->flags & PF_THREAD_BOUND) { - cpumask_t mask; + if (tsk->flags & PF_THREAD_BOUND) { mutex_lock(&callback_mutex); - mask = cs->cpus_allowed; + if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed)) + ret = -EINVAL; mutex_unlock(&callback_mutex); - if (!cpus_equal(tsk->cpus_allowed, mask)) - return -EINVAL; } - return security_task_setscheduler(tsk, 0, NULL); + return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL); } static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *oldcont, struct task_struct *tsk) { - cpumask_t cpus; nodemask_t from, to; struct mm_struct *mm; struct cpuset *cs = cgroup_cs(cont); struct cpuset *oldcs = cgroup_cs(oldcont); int err; - mutex_lock(&callback_mutex); - guarantee_online_cpus(cs, &cpus); - err = set_cpus_allowed_ptr(tsk, &cpus); - mutex_unlock(&callback_mutex); + if (cs == &top_cpuset) { + cpumask_copy(cpus_attach, cpu_possible_mask); + } else { + mutex_lock(&callback_mutex); + guarantee_online_cpus(cs, cpus_attach); + mutex_unlock(&callback_mutex); + } + err = set_cpus_allowed_ptr(tsk, cpus_attach); if (err) return; @@ -1344,7 +1400,6 @@ static void cpuset_attach(struct cgroup_subsys *ss, cpuset_migrate_mm(mm, &from, &to); mmput(mm); } - } /* The various types of files and directories in a cpuset file system */ @@ -1439,21 +1494,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, const char *buf) { int retval = 0; + struct cpuset *cs = cgroup_cs(cgrp); + struct cpuset *trialcs; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; + trialcs = alloc_trial_cpuset(cs); + if (!trialcs) + return -ENOMEM; + switch (cft->private) { case FILE_CPULIST: - retval = update_cpumask(cgroup_cs(cgrp), buf); + retval = update_cpumask(cs, trialcs, buf); break; case FILE_MEMLIST: - retval = update_nodemask(cgroup_cs(cgrp), buf); + retval = update_nodemask(cs, trialcs, buf); break; default: retval = -EINVAL; break; } + + free_trial_cpuset(trialcs); cgroup_unlock(); return retval; } @@ -1472,13 +1535,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) { - cpumask_t mask; + int ret; mutex_lock(&callback_mutex); - mask = cs->cpus_allowed; + ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); mutex_unlock(&callback_mutex); - return cpulist_scnprintf(page, PAGE_SIZE, mask); + return ret; } static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) @@ -1714,7 +1777,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, parent_cs = cgroup_cs(parent); cs->mems_allowed = parent_cs->mems_allowed; - cs->cpus_allowed = parent_cs->cpus_allowed; + cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); return; } @@ -1740,6 +1803,10 @@ static struct cgroup_subsys_state *cpuset_create( cs = kmalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); + if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { + kfree(cs); + return ERR_PTR(-ENOMEM); + } cpuset_update_task_memory_state(); cs->flags = 0; @@ -1748,7 +1815,7 @@ static struct cgroup_subsys_state *cpuset_create( if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); - cpus_clear(cs->cpus_allowed); + cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); cs->mems_generation = cpuset_mems_generation++; fmeter_init(&cs->fmeter); @@ -1775,6 +1842,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); number_of_cpusets--; + free_cpumask_var(cs->cpus_allowed); kfree(cs); } @@ -1798,6 +1866,8 @@ struct cgroup_subsys cpuset_subsys = { int __init cpuset_init_early(void) { + alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); + top_cpuset.mems_generation = cpuset_mems_generation++; return 0; } @@ -1813,7 +1883,7 @@ int __init cpuset_init(void) { int err = 0; - cpus_setall(top_cpuset.cpus_allowed); + cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); fmeter_init(&top_cpuset.fmeter); @@ -1825,6 +1895,9 @@ int __init cpuset_init(void) if (err < 0) return err; + if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) + BUG(); + number_of_cpusets = 1; return 0; } @@ -1899,7 +1972,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) * has online cpus, so can't be empty). */ parent = cs->parent; - while (cpus_empty(parent->cpus_allowed) || + while (cpumask_empty(parent->cpus_allowed) || nodes_empty(parent->mems_allowed)) parent = parent->parent; @@ -1940,7 +2013,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) } /* Continue past cpusets with all cpus, mems online */ - if (cpus_subset(cp->cpus_allowed, cpu_online_map) && + if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) continue; @@ -1948,13 +2021,14 @@ static void scan_for_empty_cpusets(struct cpuset *root) /* Remove offline cpus and mems from this cpuset. */ mutex_lock(&callback_mutex); - cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); + cpumask_and(cp->cpus_allowed, cp->cpus_allowed, + cpu_online_mask); nodes_and(cp->mems_allowed, cp->mems_allowed, node_states[N_HIGH_MEMORY]); mutex_unlock(&callback_mutex); /* Move tasks from the empty cpuset to a parent */ - if (cpus_empty(cp->cpus_allowed) || + if (cpumask_empty(cp->cpus_allowed) || nodes_empty(cp->mems_allowed)) remove_tasks_in_empty_cpuset(cp); else { @@ -1980,7 +2054,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { struct sched_domain_attr *attr; - cpumask_t *doms; + struct cpumask *doms; int ndoms; switch (phase) { @@ -1995,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, } cgroup_lock(); - top_cpuset.cpus_allowed = cpu_online_map; + cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); scan_for_empty_cpusets(&top_cpuset); ndoms = generate_sched_domains(&doms, &attr); cgroup_unlock(); @@ -2012,12 +2086,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. * See also the previous routine cpuset_track_online_cpus(). */ -void cpuset_track_online_nodes(void) +static int cpuset_track_online_nodes(struct notifier_block *self, + unsigned long action, void *arg) { cgroup_lock(); - top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; - scan_for_empty_cpusets(&top_cpuset); + switch (action) { + case MEM_ONLINE: + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + break; + case MEM_OFFLINE: + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + scan_for_empty_cpusets(&top_cpuset); + break; + default: + break; + } cgroup_unlock(); + return NOTIFY_OK; } #endif @@ -2029,24 +2114,28 @@ void cpuset_track_online_nodes(void) void __init cpuset_init_smp(void) { - top_cpuset.cpus_allowed = cpu_online_map; + cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; hotcpu_notifier(cpuset_track_online_cpus, 0); + hotplug_memory_notifier(cpuset_track_online_nodes, 10); + + cpuset_wq = create_singlethread_workqueue("cpuset"); + BUG_ON(!cpuset_wq); } /** * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. - * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. + * @pmask: pointer to struct cpumask variable to receive cpus_allowed set. * - * Description: Returns the cpumask_t cpus_allowed of the cpuset + * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of cpu_online_map, even if this means going outside the * tasks cpuset. **/ -void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) +void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { mutex_lock(&callback_mutex); cpuset_cpus_allowed_locked(tsk, pmask); @@ -2057,7 +2146,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. * Must be called with callback_mutex held. **/ -void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) +void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask) { task_lock(tsk); guarantee_online_cpus(task_cs(tsk), pmask); @@ -2340,6 +2429,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); } +/** + * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed + * @task: pointer to task_struct of some task. + * + * Description: Prints @task's name, cpuset name, and cached copy of its + * mems_allowed to the kernel log. Must hold task_lock(task) to allow + * dereferencing task_cs(task). + */ +void cpuset_print_task_mems_allowed(struct task_struct *tsk) +{ + struct dentry *dentry; + + dentry = task_cs(tsk)->css.cgroup->dentry; + spin_lock(&cpuset_buffer_lock); + snprintf(cpuset_name, CPUSET_NAME_LEN, + dentry ? (const char *)dentry->d_name.name : "/"); + nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, + tsk->mems_allowed); + printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", + tsk->comm, cpuset_name, cpuset_nodelist); + spin_unlock(&cpuset_buffer_lock); +} + /* * Collection of memory_pressure is suppressed unless * this flag is enabled by writing "1" to the special @@ -2437,19 +2549,15 @@ const struct file_operations proc_cpuset_operations = { void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t"); - m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, - task->cpus_allowed); + seq_cpumask(m, &task->cpus_allowed); seq_printf(m, "\n"); seq_printf(m, "Cpus_allowed_list:\t"); - m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count, - task->cpus_allowed); + seq_cpumask_list(m, &task->cpus_allowed); seq_printf(m, "\n"); seq_printf(m, "Mems_allowed:\t"); - m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, - task->mems_allowed); + seq_nodemask(m, &task->mems_allowed); seq_printf(m, "\n"); seq_printf(m, "Mems_allowed_list:\t"); - m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count, - task->mems_allowed); + seq_nodemask_list(m, &task->mems_allowed); seq_printf(m, "\n"); }