cgroups: consolidate cgroup documents

[safe/jmp/linux-2.6] / kernel / cpuset.c
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 827cd9a..a856788 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -36,6 +36,7 @@
  #include <linux/list.h>
  #include <linux/mempolicy.h>
  #include <linux/mm.h>
+#include <linux/memory.h>
  #include <linux/module.h>
  #include <linux/mount.h>
  #include <linux/namei.h>
@@ -83,7 +84,7 @@ struct cpuset {
         struct cgroup_subsys_state css;
  
         unsigned long flags;            /* "unsigned long" so bitops work */
-       cpumask_t cpus_allowed;         /* CPUs allowed to tasks in cpuset */
+       cpumask_var_t cpus_allowed;     /* CPUs allowed to tasks in cpuset */
         nodemask_t mems_allowed;        /* Memory Nodes allowed to tasks */
  
         struct cpuset *parent;          /* my parent */
@@ -194,8 +195,6 @@ static int cpuset_mems_generation;
  
  static struct cpuset top_cpuset = {
         .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
-       .cpus_allowed = CPU_MASK_ALL,
-       .mems_allowed = NODE_MASK_ALL,
  };
  
  /*
@@ -239,6 +238,17 @@ static struct cpuset top_cpuset = {
  static DEFINE_MUTEX(callback_mutex);
  
  /*
+ * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
+ * buffers.  They are statically allocated to prevent using excess stack
+ * when calling cpuset_print_task_mems_allowed().
+ */
+#define CPUSET_NAME_LEN                (128)
+#define        CPUSET_NODELIST_LEN     (256)
+static char cpuset_name[CPUSET_NAME_LEN];
+static char cpuset_nodelist[CPUSET_NODELIST_LEN];
+static DEFINE_SPINLOCK(cpuset_buffer_lock);
+
+/*
   * This is ugly, but preserves the userspace API for existing cpuset
   * users. If someone tries to mount the "cpuset" filesystem, we
   * silently switch it to mount "cgroup" instead
@@ -266,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
  };
  
  /*
- * Return in *pmask the portion of a cpusets's cpus_allowed that
+ * Return in pmask the portion of a cpusets's cpus_allowed that
   * are online.  If none are online, walk up the cpuset hierarchy
   * until we find one that does have some online cpus.  If we get
   * all the way to the top and still haven't found any online cpus,
@@ -279,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
   * Call with callback_mutex held.
   */
  
-static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
+static void guarantee_online_cpus(const struct cpuset *cs,
+                                 struct cpumask *pmask)
  {
-       while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
+       while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
                 cs = cs->parent;
         if (cs)
-               cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
+               cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
         else
-               *pmask = cpu_online_map;
-       BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
+               cpumask_copy(pmask, cpu_online_mask);
+       BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
  }
  
  /*
@@ -363,14 +374,9 @@ void cpuset_update_task_memory_state(void)
         struct task_struct *tsk = current;
         struct cpuset *cs;
  
-       if (task_cs(tsk) == &top_cpuset) {
-               /* Don't need rcu for top_cpuset.  It's never freed. */
-               my_cpusets_mem_gen = top_cpuset.mems_generation;
-       } else {
-               rcu_read_lock();
-               my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
-               rcu_read_unlock();
-       }
+       rcu_read_lock();
+       my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
+       rcu_read_unlock();
  
         if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
                 mutex_lock(&callback_mutex);
@@ -402,12 +408,43 @@ void cpuset_update_task_memory_state(void)
  
  static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  {
-       return  cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
+       return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
                 nodes_subset(p->mems_allowed, q->mems_allowed) &&
                 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                 is_mem_exclusive(p) <= is_mem_exclusive(q);
  }
  
+/**
+ * alloc_trial_cpuset - allocate a trial cpuset
+ * @cs: the cpuset that the trial cpuset duplicates
+ */
+static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
+{
+       struct cpuset *trial;
+
+       trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
+       if (!trial)
+               return NULL;
+
+       if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
+               kfree(trial);
+               return NULL;
+       }
+       cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+
+       return trial;
+}
+
+/**
+ * free_trial_cpuset - free the trial cpuset
+ * @trial: the trial cpuset to be freed
+ */
+static void free_trial_cpuset(struct cpuset *trial)
+{
+       free_cpumask_var(trial->cpus_allowed);
+       kfree(trial);
+}
+
  /*
   * validate_change() - Used to validate that any proposed cpuset change
   *                    follows the structural rules for cpusets.
@@ -457,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
                 c = cgroup_cs(cont);
                 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                     c != cur &&
-                   cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
+                   cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
                         return -EINVAL;
                 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                     c != cur &&
@@ -467,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
  
         /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
         if (cgroup_task_count(cur->css.cgroup)) {
-               if (cpus_empty(trial->cpus_allowed) ||
+               if (cpumask_empty(trial->cpus_allowed) ||
                     nodes_empty(trial->mems_allowed)) {
                         return -ENOSPC;
                 }
@@ -482,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
   */
  static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
  {
-       return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+       return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
  }
  
  static void
@@ -507,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
                 cp = list_first_entry(&q, struct cpuset, stack_list);
                 list_del(q.next);
  
-               if (cpus_empty(cp->cpus_allowed))
+               if (cpumask_empty(cp->cpus_allowed))
                         continue;
  
                 if (is_sched_load_balance(cp))
@@ -531,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
   * load balancing domains (sched domains) as specified by that partial
   * partition.
   *
- * See "What is sched_load_balance" in Documentation/cpusets.txt
+ * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
   * for a background explanation of this.
   *
   * Does not return errors, on the theory that the callers of this
@@ -574,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
   *     element of the partition (one sched domain) to be passed to
   *     partition_sched_domains().
   */
-static int generate_sched_domains(cpumask_t **domains,
+/* FIXME: see the FIXME in partition_sched_domains() */
+static int generate_sched_domains(struct cpumask **domains,
                         struct sched_domain_attr **attributes)
  {
         LIST_HEAD(q);           /* queue of cpusets to be scanned */
@@ -582,19 +620,18 @@ static int generate_sched_domains(cpumask_t **domains,
         struct cpuset **csa;    /* array of all cpuset ptrs */
         int csn;                /* how many cpuset ptrs in csa so far */
         int i, j, k;            /* indices for partition finding loops */
-       cpumask_t *doms;        /* resulting partition; i.e. sched domains */
+       struct cpumask *doms;   /* resulting partition; i.e. sched domains */
         struct sched_domain_attr *dattr;  /* attributes for custom domains */
-       int ndoms;              /* number of sched domains in result */
-       int nslot;              /* next empty doms[] cpumask_t slot */
+       int ndoms = 0;          /* number of sched domains in result */
+       int nslot;              /* next empty doms[] struct cpumask slot */
  
-       ndoms = 0;
         doms = NULL;
         dattr = NULL;
         csa = NULL;
  
         /* Special case for the 99% of systems with one, full, sched domain */
         if (is_sched_load_balance(&top_cpuset)) {
-               doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+               doms = kmalloc(cpumask_size(), GFP_KERNEL);
                 if (!doms)
                         goto done;
  
@@ -603,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
                         *dattr = SD_ATTR_INIT;
                         update_domain_attr_tree(dattr, &top_cpuset);
                 }
-               *doms = top_cpuset.cpus_allowed;
+               cpumask_copy(doms, top_cpuset.cpus_allowed);
  
                 ndoms = 1;
                 goto done;
@@ -622,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
                 cp = list_first_entry(&q, struct cpuset, stack_list);
                 list_del(q.next);
  
-               if (cpus_empty(cp->cpus_allowed))
+               if (cpumask_empty(cp->cpus_allowed))
                         continue;
  
                 /*
@@ -673,11 +710,9 @@ restart:
          * Now we know how many domains to create.
          * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
          */
-       doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
-       if (!doms) {
-               ndoms = 0;
+       doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
+       if (!doms)
                 goto done;
-       }
  
         /*
          * The rest of the code, including the scheduler, can deal with
@@ -687,7 +722,7 @@ restart:
  
         for (nslot = 0, i = 0; i < csn; i++) {
                 struct cpuset *a = csa[i];
-               cpumask_t *dp;
+               struct cpumask *dp;
                 int apn = a->pn;
  
                 if (apn < 0) {
@@ -710,14 +745,14 @@ restart:
                         continue;
                 }
  
-               cpus_clear(*dp);
+               cpumask_clear(dp);
                 if (dattr)
                         *(dattr + nslot) = SD_ATTR_INIT;
                 for (j = i; j < csn; j++) {
                         struct cpuset *b = csa[j];
  
                         if (apn == b->pn) {
-                               cpus_or(*dp, *dp, b->cpus_allowed);
+                               cpumask_or(dp, dp, b->cpus_allowed);
                                 if (dattr)
                                         update_domain_attr_tree(dattr + nslot, b);
  
@@ -732,6 +767,13 @@ restart:
  done:
         kfree(csa);
  
+       /*
+        * Fallback to the default domain if kmalloc() failed.
+        * See comments in partition_sched_domains().
+        */
+       if (doms == NULL)
+               ndoms = 1;
+
         *domains    = doms;
         *attributes = dattr;
         return ndoms;
@@ -750,7 +792,7 @@ done:
  static void do_rebuild_sched_domains(struct work_struct *unused)
  {
         struct sched_domain_attr *attr;
-       cpumask_t *doms;
+       struct cpumask *doms;
         int ndoms;
  
         get_online_cpus();
@@ -819,7 +861,7 @@ void rebuild_sched_domains(void)
  static int cpuset_test_cpumask(struct task_struct *tsk,
                                struct cgroup_scanner *scan)
  {
-       return !cpus_equal(tsk->cpus_allowed,
+       return !cpumask_equal(&tsk->cpus_allowed,
                         (cgroup_cs(scan->cg))->cpus_allowed);
  }
  
@@ -837,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
  static void cpuset_change_cpumask(struct task_struct *tsk,
                                   struct cgroup_scanner *scan)
  {
-       set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
+       set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
  }
  
  /**
@@ -869,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
   * @cs: the cpuset to consider
   * @buf: buffer of cpu numbers written to this cpuset
   */
-static int update_cpumask(struct cpuset *cs, const char *buf)
+static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+                         const char *buf)
  {
         struct ptr_heap heap;
-       struct cpuset trialcs;
         int retval;
         int is_load_balanced;
  
@@ -880,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
         if (cs == &top_cpuset)
                 return -EACCES;
  
-       trialcs = *cs;
-
         /*
          * An empty cpus_allowed is ok only if the cpuset has no tasks.
          * Since cpulist_parse() fails on an empty mask, we special case
@@ -889,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
          * with tasks have cpus.
          */
         if (!*buf) {
-               cpus_clear(trialcs.cpus_allowed);
+               cpumask_clear(trialcs->cpus_allowed);
         } else {
-               retval = cpulist_parse(buf, trialcs.cpus_allowed);
+               retval = cpulist_parse(buf, trialcs->cpus_allowed);
                 if (retval < 0)
                         return retval;
  
-               if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map))
+               if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
                         return -EINVAL;
         }
-       retval = validate_change(cs, &trialcs);
+       retval = validate_change(cs, trialcs);
         if (retval < 0)
                 return retval;
  
         /* Nothing to do if the cpus didn't change */
-       if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+       if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
                 return 0;
  
         retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
         if (retval)
                 return retval;
  
-       is_load_balanced = is_sched_load_balance(&trialcs);
+       is_load_balanced = is_sched_load_balance(trialcs);
  
         mutex_lock(&callback_mutex);
-       cs->cpus_allowed = trialcs.cpus_allowed;
+       cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
         mutex_unlock(&callback_mutex);
  
         /*
@@ -1001,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
         cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
  
         fudge = 10;                             /* spare mmarray[] slots */
-       fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+       fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
         retval = -ENOMEM;
  
         /*
@@ -1088,9 +1128,9 @@ done:
   * lock each such tasks mm->mmap_sem, scan its vma's and rebind
   * their mempolicies to the cpusets new mems_allowed.
   */
-static int update_nodemask(struct cpuset *cs, const char *buf)
+static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+                          const char *buf)
  {
-       struct cpuset trialcs;
         nodemask_t oldmem;
         int retval;
  
@@ -1101,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
         if (cs == &top_cpuset)
                 return -EACCES;
  
-       trialcs = *cs;
-
         /*
          * An empty mems_allowed is ok iff there are no tasks in the cpuset.
          * Since nodelist_parse() fails on an empty mask, we special case
@@ -1110,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
          * with tasks have memory.
          */
         if (!*buf) {
-               nodes_clear(trialcs.mems_allowed);
+               nodes_clear(trialcs->mems_allowed);
         } else {
-               retval = nodelist_parse(buf, trialcs.mems_allowed);
+               retval = nodelist_parse(buf, trialcs->mems_allowed);
                 if (retval < 0)
                         goto done;
  
-               if (!nodes_subset(trialcs.mems_allowed,
+               if (!nodes_subset(trialcs->mems_allowed,
                                 node_states[N_HIGH_MEMORY]))
                         return -EINVAL;
         }
         oldmem = cs->mems_allowed;
-       if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+       if (nodes_equal(oldmem, trialcs->mems_allowed)) {
                 retval = 0;             /* Too easy - nothing to do */
                 goto done;
         }
-       retval = validate_change(cs, &trialcs);
+       retval = validate_change(cs, trialcs);
         if (retval < 0)
                 goto done;
  
         mutex_lock(&callback_mutex);
-       cs->mems_allowed = trialcs.mems_allowed;
+       cs->mems_allowed = trialcs->mems_allowed;
         cs->mems_generation = cpuset_mems_generation++;
         mutex_unlock(&callback_mutex);
  
@@ -1151,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
  
         if (val != cs->relax_domain_level) {
                 cs->relax_domain_level = val;
-               if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+               if (!cpumask_empty(cs->cpus_allowed) &&
+                   is_sched_load_balance(cs))
                         async_rebuild_sched_domains();
         }
  
@@ -1170,32 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
  static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
                        int turning_on)
  {
-       struct cpuset trialcs;
+       struct cpuset *trialcs;
         int err;
-       int cpus_nonempty, balance_flag_changed;
+       int balance_flag_changed;
+
+       trialcs = alloc_trial_cpuset(cs);
+       if (!trialcs)
+               return -ENOMEM;
  
-       trialcs = *cs;
         if (turning_on)
-               set_bit(bit, &trialcs.flags);
+               set_bit(bit, &trialcs->flags);
         else
-               clear_bit(bit, &trialcs.flags);
+               clear_bit(bit, &trialcs->flags);
  
-       err = validate_change(cs, &trialcs);
+       err = validate_change(cs, trialcs);
         if (err < 0)
-               return err;
+               goto out;
  
-       cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
         balance_flag_changed = (is_sched_load_balance(cs) !=
-                                       is_sched_load_balance(&trialcs));
+                               is_sched_load_balance(trialcs));
  
         mutex_lock(&callback_mutex);
-       cs->flags = trialcs.flags;
+       cs->flags = trialcs->flags;
         mutex_unlock(&callback_mutex);
  
-       if (cpus_nonempty && balance_flag_changed)
+       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
                 async_rebuild_sched_domains();
  
-       return 0;
+out:
+       free_trial_cpuset(trialcs);
+       return err;
  }
  
  /*
@@ -1296,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
         return val;
  }
  
+/* Protected by cgroup_lock */
+static cpumask_var_t cpus_attach;
+
  /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
  static int cpuset_can_attach(struct cgroup_subsys *ss,
                              struct cgroup *cont, struct task_struct *tsk)
  {
         struct cpuset *cs = cgroup_cs(cont);
+       int ret = 0;
  
-       if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+       if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
                 return -ENOSPC;
-       if (tsk->flags & PF_THREAD_BOUND) {
-               cpumask_t mask;
  
+       if (tsk->flags & PF_THREAD_BOUND) {
                 mutex_lock(&callback_mutex);
-               mask = cs->cpus_allowed;
+               if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
+                       ret = -EINVAL;
                 mutex_unlock(&callback_mutex);
-               if (!cpus_equal(tsk->cpus_allowed, mask))
-                       return -EINVAL;
         }
  
-       return security_task_setscheduler(tsk, 0, NULL);
+       return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
  }
  
  static void cpuset_attach(struct cgroup_subsys *ss,
                           struct cgroup *cont, struct cgroup *oldcont,
                           struct task_struct *tsk)
  {
-       cpumask_t cpus;
         nodemask_t from, to;
         struct mm_struct *mm;
         struct cpuset *cs = cgroup_cs(cont);
         struct cpuset *oldcs = cgroup_cs(oldcont);
         int err;
  
-       mutex_lock(&callback_mutex);
-       guarantee_online_cpus(cs, &cpus);
-       err = set_cpus_allowed_ptr(tsk, &cpus);
-       mutex_unlock(&callback_mutex);
+       if (cs == &top_cpuset) {
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       } else {
+               mutex_lock(&callback_mutex);
+               guarantee_online_cpus(cs, cpus_attach);
+               mutex_unlock(&callback_mutex);
+       }
+       err = set_cpus_allowed_ptr(tsk, cpus_attach);
         if (err)
                 return;
  
@@ -1344,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
                         cpuset_migrate_mm(mm, &from, &to);
                 mmput(mm);
         }
-
  }
  
  /* The various types of files and directories in a cpuset file system */
@@ -1439,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
                                 const char *buf)
  {
         int retval = 0;
+       struct cpuset *cs = cgroup_cs(cgrp);
+       struct cpuset *trialcs;
  
         if (!cgroup_lock_live_group(cgrp))
                 return -ENODEV;
  
+       trialcs = alloc_trial_cpuset(cs);
+       if (!trialcs)
+               return -ENOMEM;
+
         switch (cft->private) {
         case FILE_CPULIST:
-               retval = update_cpumask(cgroup_cs(cgrp), buf);
+               retval = update_cpumask(cs, trialcs, buf);
                 break;
         case FILE_MEMLIST:
-               retval = update_nodemask(cgroup_cs(cgrp), buf);
+               retval = update_nodemask(cs, trialcs, buf);
                 break;
         default:
                 retval = -EINVAL;
                 break;
         }
+
+       free_trial_cpuset(trialcs);
         cgroup_unlock();
         return retval;
  }
@@ -1472,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
  
  static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
  {
-       cpumask_t mask;
+       int ret;
  
         mutex_lock(&callback_mutex);
-       mask = cs->cpus_allowed;
+       ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
         mutex_unlock(&callback_mutex);
  
-       return cpulist_scnprintf(page, PAGE_SIZE, mask);
+       return ret;
  }
  
  static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1714,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
         parent_cs = cgroup_cs(parent);
  
         cs->mems_allowed = parent_cs->mems_allowed;
-       cs->cpus_allowed = parent_cs->cpus_allowed;
+       cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
         return;
  }
  
@@ -1740,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
         cs = kmalloc(sizeof(*cs), GFP_KERNEL);
         if (!cs)
                 return ERR_PTR(-ENOMEM);
+       if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
+               kfree(cs);
+               return ERR_PTR(-ENOMEM);
+       }
  
         cpuset_update_task_memory_state();
         cs->flags = 0;
@@ -1748,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
         if (is_spread_slab(parent))
                 set_bit(CS_SPREAD_SLAB, &cs->flags);
         set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
-       cpus_clear(cs->cpus_allowed);
+       cpumask_clear(cs->cpus_allowed);
         nodes_clear(cs->mems_allowed);
         cs->mems_generation = cpuset_mems_generation++;
         fmeter_init(&cs->fmeter);
@@ -1775,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
                 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
  
         number_of_cpusets--;
+       free_cpumask_var(cs->cpus_allowed);
         kfree(cs);
  }
  
@@ -1798,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
  
  int __init cpuset_init_early(void)
  {
+       alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+
         top_cpuset.mems_generation = cpuset_mems_generation++;
         return 0;
  }
@@ -1813,7 +1875,7 @@ int __init cpuset_init(void)
  {
         int err = 0;
  
-       cpus_setall(top_cpuset.cpus_allowed);
+       cpumask_setall(top_cpuset.cpus_allowed);
         nodes_setall(top_cpuset.mems_allowed);
  
         fmeter_init(&top_cpuset.fmeter);
@@ -1825,6 +1887,9 @@ int __init cpuset_init(void)
         if (err < 0)
                 return err;
  
+       if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
+               BUG();
+
         number_of_cpusets = 1;
         return 0;
  }
@@ -1899,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
          * has online cpus, so can't be empty).
          */
         parent = cs->parent;
-       while (cpus_empty(parent->cpus_allowed) ||
+       while (cpumask_empty(parent->cpus_allowed) ||
                         nodes_empty(parent->mems_allowed))
                 parent = parent->parent;
  
@@ -1921,7 +1986,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
   * that has tasks along with an empty 'mems'.  But if we did see such
   * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
   */
-static void scan_for_empty_cpusets(const struct cpuset *root)
+static void scan_for_empty_cpusets(struct cpuset *root)
  {
         LIST_HEAD(queue);
         struct cpuset *cp;      /* scans cpusets being updated */
@@ -1940,7 +2005,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
                 }
  
                 /* Continue past cpusets with all cpus, mems online */
-               if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
+               if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
                     nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
                         continue;
  
@@ -1948,13 +2013,14 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
  
                 /* Remove offline cpus and mems from this cpuset. */
                 mutex_lock(&callback_mutex);
-               cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
+               cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
+                           cpu_online_mask);
                 nodes_and(cp->mems_allowed, cp->mems_allowed,
                                                 node_states[N_HIGH_MEMORY]);
                 mutex_unlock(&callback_mutex);
  
                 /* Move tasks from the empty cpuset to a parent */
-               if (cpus_empty(cp->cpus_allowed) ||
+               if (cpumask_empty(cp->cpus_allowed) ||
                      nodes_empty(cp->mems_allowed))
                         remove_tasks_in_empty_cpuset(cp);
                 else {
@@ -1980,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
                                 unsigned long phase, void *unused_cpu)
  {
         struct sched_domain_attr *attr;
-       cpumask_t *doms;
+       struct cpumask *doms;
         int ndoms;
  
         switch (phase) {
@@ -1995,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
         }
  
         cgroup_lock();
-       top_cpuset.cpus_allowed = cpu_online_map;
+       cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
         scan_for_empty_cpusets(&top_cpuset);
         ndoms = generate_sched_domains(&doms, &attr);
         cgroup_unlock();
@@ -2012,12 +2078,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
   * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
   * See also the previous routine cpuset_track_online_cpus().
   */
-void cpuset_track_online_nodes(void)
+static int cpuset_track_online_nodes(struct notifier_block *self,
+                               unsigned long action, void *arg)
  {
         cgroup_lock();
-       top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
-       scan_for_empty_cpusets(&top_cpuset);
+       switch (action) {
+       case MEM_ONLINE:
+               top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+               break;
+       case MEM_OFFLINE:
+               top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+               scan_for_empty_cpusets(&top_cpuset);
+               break;
+       default:
+               break;
+       }
         cgroup_unlock();
+       return NOTIFY_OK;
  }
  #endif
  
@@ -2029,24 +2106,25 @@ void cpuset_track_online_nodes(void)
  
  void __init cpuset_init_smp(void)
  {
-       top_cpuset.cpus_allowed = cpu_online_map;
+       cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
         top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
  
         hotcpu_notifier(cpuset_track_online_cpus, 0);
+       hotplug_memory_notifier(cpuset_track_online_nodes, 10);
  }
  
  /**
   * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
- * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
+ * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
   *
- * Description: Returns the cpumask_t cpus_allowed of the cpuset
+ * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
   * attached to the specified @tsk.  Guaranteed to return some non-empty
   * subset of cpu_online_map, even if this means going outside the
   * tasks cpuset.
   **/
  
-void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
  {
         mutex_lock(&callback_mutex);
         cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2057,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
   * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
   * Must be called with callback_mutex held.
   **/
-void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
  {
         task_lock(tsk);
         guarantee_online_cpus(task_cs(tsk), pmask);
@@ -2340,6 +2418,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
         return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
  }
  
+/**
+ * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
+ * @task: pointer to task_struct of some task.
+ *
+ * Description: Prints @task's name, cpuset name, and cached copy of its
+ * mems_allowed to the kernel log.  Must hold task_lock(task) to allow
+ * dereferencing task_cs(task).
+ */
+void cpuset_print_task_mems_allowed(struct task_struct *tsk)
+{
+       struct dentry *dentry;
+
+       dentry = task_cs(tsk)->css.cgroup->dentry;
+       spin_lock(&cpuset_buffer_lock);
+       snprintf(cpuset_name, CPUSET_NAME_LEN,
+                dentry ? (const char *)dentry->d_name.name : "/");
+       nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
+                          tsk->mems_allowed);
+       printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
+              tsk->comm, cpuset_name, cpuset_nodelist);
+       spin_unlock(&cpuset_buffer_lock);
+}
+
  /*
   * Collection of memory_pressure is suppressed unless
   * this flag is enabled by writing "1" to the special
@@ -2437,19 +2538,15 @@ const struct file_operations proc_cpuset_operations = {
  void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
  {
         seq_printf(m, "Cpus_allowed:\t");
-       m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->cpus_allowed);
+       seq_cpumask(m, &task->cpus_allowed);
         seq_printf(m, "\n");
         seq_printf(m, "Cpus_allowed_list:\t");
-       m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->cpus_allowed);
+       seq_cpumask_list(m, &task->cpus_allowed);
         seq_printf(m, "\n");
         seq_printf(m, "Mems_allowed:\t");
-       m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->mems_allowed);
+       seq_nodemask(m, &task->mems_allowed);
         seq_printf(m, "\n");
         seq_printf(m, "Mems_allowed_list:\t");
-       m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->mems_allowed);
+       seq_nodemask_list(m, &task->mems_allowed);
         seq_printf(m, "\n");
  }