+ struct task_struct *tsk = current;
+
+ cpuset_update_task_memory_state();
+
+ mutex_lock(&callback_mutex);
+ tsk->mems_allowed = *to;
+ mutex_unlock(&callback_mutex);
+
+ do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+
+ mutex_lock(&callback_mutex);
+ guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
+ mutex_unlock(&callback_mutex);
+}
+
+/*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset. Needs to validate the request, update the
+ * cpusets mems_allowed and mems_generation, and for each
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
+ *
+ * Call with cgroup_mutex held. May take callback_mutex during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+
+static void *cpuset_being_rebound;
+
+static int update_nodemask(struct cpuset *cs, char *buf)
+{
+ struct cpuset trialcs;
+ nodemask_t oldmem;
+ struct task_struct *p;
+ struct mm_struct **mmarray;
+ int i, n, ntasks;
+ int migrate;
+ int fudge;
+ int retval;
+ struct cgroup_iter it;
+
+ /*
+ * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+ * it's read-only
+ */
+ if (cs == &top_cpuset)
+ return -EACCES;
+
+ trialcs = *cs;
+
+ /*
+ * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+ * Since nodelist_parse() fails on an empty mask, we special case
+ * that parsing. The validate_change() call ensures that cpusets
+ * with tasks have memory.
+ */
+ buf = strstrip(buf);
+ if (!*buf) {
+ nodes_clear(trialcs.mems_allowed);
+ } else {
+ retval = nodelist_parse(buf, trialcs.mems_allowed);
+ if (retval < 0)
+ goto done;
+ }
+ nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
+ node_states[N_HIGH_MEMORY]);
+ oldmem = cs->mems_allowed;
+ if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+ retval = 0; /* Too easy - nothing to do */
+ goto done;
+ }
+ retval = validate_change(cs, &trialcs);
+ if (retval < 0)
+ goto done;
+
+ mutex_lock(&callback_mutex);
+ cs->mems_allowed = trialcs.mems_allowed;
+ cs->mems_generation = cpuset_mems_generation++;
+ mutex_unlock(&callback_mutex);
+
+ cpuset_being_rebound = cs; /* causes mpol_copy() rebind */
+
+ fudge = 10; /* spare mmarray[] slots */
+ fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+ retval = -ENOMEM;
+
+ /*
+ * Allocate mmarray[] to hold mm reference for each task
+ * in cpuset cs. Can't kmalloc GFP_KERNEL while holding
+ * tasklist_lock. We could use GFP_ATOMIC, but with a
+ * few more lines of code, we can retry until we get a big
+ * enough mmarray[] w/o using GFP_ATOMIC.
+ */
+ while (1) {
+ ntasks = cgroup_task_count(cs->css.cgroup); /* guess */
+ ntasks += fudge;
+ mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
+ if (!mmarray)
+ goto done;
+ read_lock(&tasklist_lock); /* block fork */
+ if (cgroup_task_count(cs->css.cgroup) <= ntasks)
+ break; /* got enough */
+ read_unlock(&tasklist_lock); /* try again */
+ kfree(mmarray);
+ }
+
+ n = 0;
+
+ /* Load up mmarray[] with mm reference for each task in cpuset. */
+ cgroup_iter_start(cs->css.cgroup, &it);
+ while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
+ struct mm_struct *mm;
+
+ if (n >= ntasks) {
+ printk(KERN_WARNING
+ "Cpuset mempolicy rebind incomplete.\n");
+ break;
+ }
+ mm = get_task_mm(p);
+ if (!mm)
+ continue;
+ mmarray[n++] = mm;
+ }
+ cgroup_iter_end(cs->css.cgroup, &it);
+ read_unlock(&tasklist_lock);
+
+ /*
+ * Now that we've dropped the tasklist spinlock, we can
+ * rebind the vma mempolicies of each mm in mmarray[] to their
+ * new cpuset, and release that mm. The mpol_rebind_mm()
+ * call takes mmap_sem, which we couldn't take while holding
+ * tasklist_lock. Forks can happen again now - the mpol_copy()
+ * cpuset_being_rebound check will catch such forks, and rebind
+ * their vma mempolicies too. Because we still hold the global
+ * cgroup_mutex, we know that no other rebind effort will
+ * be contending for the global variable cpuset_being_rebound.
+ * It's ok if we rebind the same mm twice; mpol_rebind_mm()
+ * is idempotent. Also migrate pages in each mm to new nodes.
+ */
+ migrate = is_memory_migrate(cs);
+ for (i = 0; i < n; i++) {
+ struct mm_struct *mm = mmarray[i];
+
+ mpol_rebind_mm(mm, &cs->mems_allowed);
+ if (migrate)
+ cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
+ mmput(mm);
+ }
+
+ /* We're done rebinding vmas to this cpuset's new mems_allowed. */
+ kfree(mmarray);
+ cpuset_being_rebound = NULL;
+ retval = 0;
+done:
+ return retval;
+}
+
+int current_cpuset_is_being_rebound(void)
+{
+ return task_cs(current) == cpuset_being_rebound;
+}
+
+/*
+ * Call with cgroup_mutex held.
+ */
+
+static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
+{
+ if (simple_strtoul(buf, NULL, 10) != 0)
+ cpuset_memory_pressure_enabled = 1;
+ else
+ cpuset_memory_pressure_enabled = 0;
+ return 0;
+}
+
+/*
+ * update_flag - read a 0 or a 1 in a file and update associated flag
+ * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
+ * CS_SCHED_LOAD_BALANCE,
+ * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
+ * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
+ * cs: the cpuset to update
+ * buf: the buffer where we read the 0 or 1
+ *
+ * Call with cgroup_mutex held.
+ */
+
+static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
+{
+ int turning_on;
+ struct cpuset trialcs;
+ int err;
+ int cpus_nonempty, balance_flag_changed;