X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=02b9611eadde3ebe638b9c24ffbbb5ec8ada5c06;hb=8fade6aff706b2ae3f02864b4023d34b002cd226;hp=d10946748ec2a3c9a070301f4af78c5f4dd18845;hpb=53feb29767c29c877f9d47dcfe14211b5b0f7ebd;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d109467..02b9611 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, * In order to avoid seeing no nodes if the old and new nodes are disjoint, * we structure updates as setting all new allowed nodes, then clearing newly * disallowed ones. - * - * Called with task's alloc_lock held */ static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { +repeat: + /* + * Allow tasks that have access to memory reserves because they have + * been OOM killed to get memory anywhere. + */ + if (unlikely(test_thread_flag(TIF_MEMDIE))) + return; + if (current->flags & PF_EXITING) /* Let dying task have memory */ + return; + + task_lock(tsk); nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); - mpol_rebind_task(tsk, &tsk->mems_allowed); - mpol_rebind_task(tsk, newmems); + mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); + + + /* + * ensure checking ->mems_allowed_change_disable after setting all new + * allowed nodes. + * + * the read-side task can see an nodemask with new allowed nodes and + * old allowed nodes. and if it allocates page when cpuset clears newly + * disallowed ones continuous, it can see the new allowed bits. + * + * And if setting all new allowed nodes is after the checking, setting + * all new allowed nodes and clearing newly disallowed ones will be done + * continuous, and the read-side task may find no node to alloc page. + */ + smp_mb(); + + /* + * Allocation of memory is very fast, we needn't sleep when waiting + * for the read-side. + */ + while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { + task_unlock(tsk); + if (!task_curr(tsk)) + yield(); + goto repeat; + } + + /* + * ensure checking ->mems_allowed_change_disable before clearing all new + * disallowed nodes. + * + * if clearing newly disallowed bits before the checking, the read-side + * task may find no node to alloc page. + */ + smp_mb(); + + mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); tsk->mems_allowed = *newmems; + task_unlock(tsk); } /* @@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p, cs = cgroup_cs(scan->cg); guarantee_online_mems(cs, newmems); - task_lock(p); cpuset_change_task_nodemask(p, newmems); - task_unlock(p); NODEMASK_FREE(newmems); @@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, err = set_cpus_allowed_ptr(tsk, cpus_attach); WARN_ON_ONCE(err); - task_lock(tsk); cpuset_change_task_nodemask(tsk, to); - task_unlock(tsk); cpuset_update_task_spread_flag(cs, tsk); } @@ -2182,19 +2224,52 @@ void __init cpuset_init_smp(void) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { mutex_lock(&callback_mutex); - cpuset_cpus_allowed_locked(tsk, pmask); + task_lock(tsk); + guarantee_online_cpus(task_cs(tsk), pmask); + task_unlock(tsk); mutex_unlock(&callback_mutex); } -/** - * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. - * Must be called with callback_mutex held. - **/ -void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask) +int cpuset_cpus_allowed_fallback(struct task_struct *tsk) { - task_lock(tsk); - guarantee_online_cpus(task_cs(tsk), pmask); - task_unlock(tsk); + const struct cpuset *cs; + int cpu; + + rcu_read_lock(); + cs = task_cs(tsk); + if (cs) + cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); + rcu_read_unlock(); + + /* + * We own tsk->cpus_allowed, nobody can change it under us. + * + * But we used cs && cs->cpus_allowed lockless and thus can + * race with cgroup_attach_task() or update_cpumask() and get + * the wrong tsk->cpus_allowed. However, both cases imply the + * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() + * which takes task_rq_lock(). + * + * If we are called after it dropped the lock we must see all + * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary + * set any mask even if it is not right from task_cs() pov, + * the pending set_cpus_allowed_ptr() will fix things. + */ + + cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); + if (cpu >= nr_cpu_ids) { + /* + * Either tsk->cpus_allowed is wrong (see above) or it + * is actually empty. The latter case is only possible + * if we are racing with remove_tasks_in_empty_cpuset(). + * Like above we can temporary set any mask and rely on + * set_cpus_allowed_ptr() as synchronization point. + */ + cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); + cpu = cpumask_any(cpu_active_mask); + } + + return cpu; } void cpuset_init_current_mems_allowed(void) @@ -2383,22 +2458,6 @@ int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) } /** - * cpuset_lock - lock out any changes to cpuset structures - * - * The out of memory (oom) code needs to mutex_lock cpusets - * from being changed while it scans the tasklist looking for a - * task in an overlapping cpuset. Expose callback_mutex via this - * cpuset_lock() routine, so the oom code can lock it, before - * locking the task list. The tasklist_lock is a spinlock, so - * must be taken inside callback_mutex. - */ - -void cpuset_lock(void) -{ - mutex_lock(&callback_mutex); -} - -/** * cpuset_unlock - release lock on cpuset changes * * Undo the lock taken in a previous cpuset_lock() call. @@ -2410,7 +2469,8 @@ void cpuset_unlock(void) } /** - * cpuset_mem_spread_node() - On which node to begin search for a page + * cpuset_mem_spread_node() - On which node to begin search for a file page + * cpuset_slab_spread_node() - On which node to begin search for a slab page * * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for * tasks in a cpuset with is_spread_page or is_spread_slab set), @@ -2435,16 +2495,27 @@ void cpuset_unlock(void) * See kmem_cache_alloc_node(). */ -int cpuset_mem_spread_node(void) +static int cpuset_spread_node(int *rotor) { int node; - node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed); + node = next_node(*rotor, current->mems_allowed); if (node == MAX_NUMNODES) node = first_node(current->mems_allowed); - current->cpuset_mem_spread_rotor = node; + *rotor = node; return node; } + +int cpuset_mem_spread_node(void) +{ + return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); +} + +int cpuset_slab_spread_node(void) +{ + return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); +} + EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); /**