X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;ds=sidebyside;f=kernel%2Fsched_rt.c;h=908c04f9dad02d23df66fbc55a28f5619fc10081;hb=57b1494d2ba544c62673234da6115c21fac27ffc;hp=44b06d75416ea23f345e54fc682f2665e3be0040;hpb=6e0534f278199f1e3dd1049b9bc19a7a5b87ada1;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 44b06d7..908c04f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) static inline void rt_set_overload(struct rq *rq) { + if (!rq->online) + return; + cpu_set(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set @@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) static inline void rt_clear_overload(struct rq *rq) { + if (!rq->online) + return; + /* the order here really doesn't matter */ atomic_dec(&rq->rd->rto_count); cpu_clear(rq->cpu, rq->rd->rto_mask); @@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) return &rt_rq->tg->rt_bandwidth; } -#else +#else /* !CONFIG_RT_GROUP_SCHED */ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { @@ -220,48 +226,10 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) return &def_rt_bandwidth; } -#endif - -static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) -{ - int i, idle = 1; - cpumask_t span; - - if (rt_b->rt_runtime == RUNTIME_INF) - return 1; - - span = sched_rt_period_mask(); - for_each_cpu_mask(i, span) { - int enqueue = 0; - struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); - struct rq *rq = rq_of_rt_rq(rt_rq); - - spin_lock(&rq->lock); - if (rt_rq->rt_time) { - u64 runtime; - - spin_lock(&rt_rq->rt_runtime_lock); - runtime = rt_rq->rt_runtime; - rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); - if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { - rt_rq->rt_throttled = 0; - enqueue = 1; - } - if (rt_rq->rt_time || rt_rq->rt_nr_running) - idle = 0; - spin_unlock(&rt_rq->rt_runtime_lock); - } - - if (enqueue) - sched_rt_rq_enqueue(rt_rq); - spin_unlock(&rq->lock); - } - - return idle; -} +#endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_SMP -static int balance_runtime(struct rt_rq *rt_rq) +static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); struct root_domain *rd = cpu_rq(smp_processor_id())->rd; @@ -272,7 +240,7 @@ static int balance_runtime(struct rt_rq *rt_rq) spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu_mask(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -280,9 +248,12 @@ static int balance_runtime(struct rt_rq *rt_rq) continue; spin_lock(&iter->rt_runtime_lock); + if (iter->rt_runtime == RUNTIME_INF) + goto next; + diff = iter->rt_runtime - iter->rt_time; if (diff > 0) { - do_div(diff, weight); + diff = div_u64((u64)diff, weight); if (rt_rq->rt_runtime + diff > rt_period) diff = rt_period - rt_rq->rt_runtime; iter->rt_runtime -= diff; @@ -293,13 +264,163 @@ static int balance_runtime(struct rt_rq *rt_rq) break; } } +next: spin_unlock(&iter->rt_runtime_lock); } spin_unlock(&rt_b->rt_runtime_lock); return more; } -#endif + +static void __disable_runtime(struct rq *rq) +{ + struct root_domain *rd = rq->rd; + struct rt_rq *rt_rq; + + if (unlikely(!scheduler_running)) + return; + + for_each_leaf_rt_rq(rt_rq, rq) { + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + s64 want; + int i; + + spin_lock(&rt_b->rt_runtime_lock); + spin_lock(&rt_rq->rt_runtime_lock); + if (rt_rq->rt_runtime == RUNTIME_INF || + rt_rq->rt_runtime == rt_b->rt_runtime) + goto balanced; + spin_unlock(&rt_rq->rt_runtime_lock); + + want = rt_b->rt_runtime - rt_rq->rt_runtime; + + for_each_cpu_mask(i, rd->span) { + struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); + s64 diff; + + if (iter == rt_rq) + continue; + + spin_lock(&iter->rt_runtime_lock); + if (want > 0) { + diff = min_t(s64, iter->rt_runtime, want); + iter->rt_runtime -= diff; + want -= diff; + } else { + iter->rt_runtime -= want; + want -= want; + } + spin_unlock(&iter->rt_runtime_lock); + + if (!want) + break; + } + + spin_lock(&rt_rq->rt_runtime_lock); + BUG_ON(want); +balanced: + rt_rq->rt_runtime = RUNTIME_INF; + spin_unlock(&rt_rq->rt_runtime_lock); + spin_unlock(&rt_b->rt_runtime_lock); + } +} + +static void disable_runtime(struct rq *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + __disable_runtime(rq); + spin_unlock_irqrestore(&rq->lock, flags); +} + +static void __enable_runtime(struct rq *rq) +{ + struct rt_rq *rt_rq; + + if (unlikely(!scheduler_running)) + return; + + for_each_leaf_rt_rq(rt_rq, rq) { + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + + spin_lock(&rt_b->rt_runtime_lock); + spin_lock(&rt_rq->rt_runtime_lock); + rt_rq->rt_runtime = rt_b->rt_runtime; + rt_rq->rt_time = 0; + spin_unlock(&rt_rq->rt_runtime_lock); + spin_unlock(&rt_b->rt_runtime_lock); + } +} + +static void enable_runtime(struct rq *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + __enable_runtime(rq); + spin_unlock_irqrestore(&rq->lock, flags); +} + +static int balance_runtime(struct rt_rq *rt_rq) +{ + int more = 0; + + if (rt_rq->rt_time > rt_rq->rt_runtime) { + spin_unlock(&rt_rq->rt_runtime_lock); + more = do_balance_runtime(rt_rq); + spin_lock(&rt_rq->rt_runtime_lock); + } + + return more; +} +#else /* !CONFIG_SMP */ +static inline int balance_runtime(struct rt_rq *rt_rq) +{ + return 0; +} +#endif /* CONFIG_SMP */ + +static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) +{ + int i, idle = 1; + cpumask_t span; + + if (rt_b->rt_runtime == RUNTIME_INF) + return 1; + + span = sched_rt_period_mask(); + for_each_cpu_mask(i, span) { + int enqueue = 0; + struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); + struct rq *rq = rq_of_rt_rq(rt_rq); + + spin_lock(&rq->lock); + if (rt_rq->rt_time) { + u64 runtime; + + spin_lock(&rt_rq->rt_runtime_lock); + if (rt_rq->rt_throttled) + balance_runtime(rt_rq); + runtime = rt_rq->rt_runtime; + rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); + if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { + rt_rq->rt_throttled = 0; + enqueue = 1; + } + if (rt_rq->rt_time || rt_rq->rt_nr_running) + idle = 0; + spin_unlock(&rt_rq->rt_runtime_lock); + } else if (rt_rq->rt_nr_running) + idle = 0; + + if (enqueue) + sched_rt_rq_enqueue(rt_rq); + spin_unlock(&rq->lock); + } + + return idle; +} static inline int rt_se_prio(struct sched_rt_entity *rt_se) { @@ -326,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) return 0; -#ifdef CONFIG_SMP - if (rt_rq->rt_time > runtime) { - int more; - - spin_unlock(&rt_rq->rt_runtime_lock); - more = balance_runtime(rt_rq); - spin_lock(&rt_rq->rt_runtime_lock); - - if (more) - runtime = sched_rt_runtime(rt_rq); - } -#endif + balance_runtime(rt_rq); + runtime = sched_rt_runtime(rt_rq); + if (runtime == RUNTIME_INF) + return 0; if (rt_rq->rt_time > runtime) { rt_rq->rt_throttled = 1; @@ -392,14 +505,22 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq->rt_nr_running++; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_se_prio(rt_se) < rt_rq->highest_prio) { +#ifdef CONFIG_SMP struct rq *rq = rq_of_rt_rq(rt_rq); +#endif + rt_rq->highest_prio = rt_se_prio(rt_se); - cpupri_set(&rq->rd->cpupri, rq->cpu, rt_se_prio(rt_se)); +#ifdef CONFIG_SMP + if (rq->online) + cpupri_set(&rq->rd->cpupri, rq->cpu, + rt_se_prio(rt_se)); +#endif } #endif #ifdef CONFIG_SMP if (rt_se->nr_cpus_allowed > 1) { struct rq *rq = rq_of_rt_rq(rt_rq); + rq->rt.rt_nr_migratory++; } @@ -448,7 +569,10 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) if (rt_rq->highest_prio != highest_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); - cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio); + + if (rq->online) + cpupri_set(&rq->rd->cpupri, rq->cpu, + rt_rq->highest_prio); } update_rt_migration(rq_of_rt_rq(rt_rq)); @@ -461,35 +585,35 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) #endif } -static void enqueue_rt_entity(struct sched_rt_entity *rt_se) +static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rt_prio_array *array = &rt_rq->active; struct rt_rq *group_rq = group_rt_rq(rt_se); + struct list_head *queue = array->queue + rt_se_prio(rt_se); - if (group_rq && rt_rq_throttled(group_rq)) + /* + * Don't enqueue the group if its throttled, or when empty. + * The latter is a consequence of the former when a child group + * get throttled and the current group doesn't have any other + * active members. + */ + if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - if (rt_se->nr_cpus_allowed == 1) - list_add_tail(&rt_se->run_list, - array->xqueue + rt_se_prio(rt_se)); - else - list_add_tail(&rt_se->run_list, - array->squeue + rt_se_prio(rt_se)); - + list_add_tail(&rt_se->run_list, queue); __set_bit(rt_se_prio(rt_se), array->bitmap); inc_rt_tasks(rt_se, rt_rq); } -static void dequeue_rt_entity(struct sched_rt_entity *rt_se) +static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rt_prio_array *array = &rt_rq->active; list_del_init(&rt_se->run_list); - if (list_empty(array->squeue + rt_se_prio(rt_se)) - && list_empty(array->xqueue + rt_se_prio(rt_se))) + if (list_empty(array->queue + rt_se_prio(rt_se))) __clear_bit(rt_se_prio(rt_se), array->bitmap); dec_rt_tasks(rt_se, rt_rq); @@ -499,11 +623,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) * Because the prio of an upper entry depends on the lower * entries, we must remove entries top - down. */ -static void dequeue_rt_stack(struct task_struct *p) +static void dequeue_rt_stack(struct sched_rt_entity *rt_se) { - struct sched_rt_entity *rt_se, *back = NULL; + struct sched_rt_entity *back = NULL; - rt_se = &p->rt; for_each_sched_rt_entity(rt_se) { rt_se->back = back; back = rt_se; @@ -511,7 +634,26 @@ static void dequeue_rt_stack(struct task_struct *p) for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se)) - dequeue_rt_entity(rt_se); + __dequeue_rt_entity(rt_se); + } +} + +static void enqueue_rt_entity(struct sched_rt_entity *rt_se) +{ + dequeue_rt_stack(rt_se); + for_each_sched_rt_entity(rt_se) + __enqueue_rt_entity(rt_se); +} + +static void dequeue_rt_entity(struct sched_rt_entity *rt_se) +{ + dequeue_rt_stack(rt_se); + + for_each_sched_rt_entity(rt_se) { + struct rt_rq *rt_rq = group_rt_rq(rt_se); + + if (rt_rq && rt_rq->rt_nr_running) + __enqueue_rt_entity(rt_se); } } @@ -525,66 +667,53 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) if (wakeup) rt_se->timeout = 0; - dequeue_rt_stack(p); + enqueue_rt_entity(rt_se); - /* - * enqueue everybody, bottom - up. - */ - for_each_sched_rt_entity(rt_se) - enqueue_rt_entity(rt_se); + inc_cpu_load(rq, p->se.load.weight); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) { struct sched_rt_entity *rt_se = &p->rt; - struct rt_rq *rt_rq; update_curr_rt(rq); + dequeue_rt_entity(rt_se); - dequeue_rt_stack(p); - - /* - * re-enqueue all non-empty rt_rq entities. - */ - for_each_sched_rt_entity(rt_se) { - rt_rq = group_rt_rq(rt_se); - if (rt_rq && rt_rq->rt_nr_running) - enqueue_rt_entity(rt_se); - } + dec_cpu_load(rq, p->se.load.weight); } /* * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. - * - * Note: We always enqueue the task to the shared-queue, regardless of its - * previous position w.r.t. exclusive vs shared. This is so that exclusive RR - * tasks fairly round-robin with all tasks on the runqueue, not just other - * exclusive tasks. */ -static -void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) +static void +requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) { - struct rt_prio_array *array = &rt_rq->active; + if (on_rt_rq(rt_se)) { + struct rt_prio_array *array = &rt_rq->active; + struct list_head *queue = array->queue + rt_se_prio(rt_se); - list_del_init(&rt_se->run_list); - list_add_tail(&rt_se->run_list, array->squeue + rt_se_prio(rt_se)); + if (head) + list_move(&rt_se->run_list, queue); + else + list_move_tail(&rt_se->run_list, queue); + } } -static void requeue_task_rt(struct rq *rq, struct task_struct *p) +static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) { struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq; for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); - requeue_rt_entity(rt_rq, rt_se); + requeue_rt_entity(rt_rq, rt_se, head); } } static void yield_task_rt(struct rq *rq) { - requeue_task_rt(rq, rq->curr); + requeue_task_rt(rq, rq->curr, 0); } #ifdef CONFIG_SMP @@ -624,10 +753,31 @@ static int select_task_rq_rt(struct task_struct *p, int sync) */ return task_cpu(p); } -#endif /* CONFIG_SMP */ -static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, - struct rt_rq *rt_rq); +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) +{ + cpumask_t mask; + + if (rq->curr->rt.nr_cpus_allowed == 1) + return; + + if (p->rt.nr_cpus_allowed != 1 + && cpupri_find(&rq->rd->cpupri, p, &mask)) + return; + + if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + return; + + /* + * There appears to be other cpus that can accept + * current and none to run 'p', so lets reschedule + * to try and push current away: + */ + requeue_task_rt(rq, p, 1); + resched_task(rq->curr); +} + +#endif /* CONFIG_SMP */ /* * Preempt the current task with a newly woken task if needed: @@ -652,19 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) * to move current somewhere else, making room for our non-migratable * task. */ - if((p->prio == rq->curr->prio) - && p->rt.nr_cpus_allowed == 1 - && rq->curr->rt.nr_cpus_allowed != 1 - && pick_next_rt_entity(rq, &rq->rt) != &rq->curr->rt) { - cpumask_t mask; - - if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) - /* - * There appears to be other cpus that can accept - * current, so lets reschedule to try and push it away - */ - resched_task(rq->curr); - } + if (p->prio == rq->curr->prio && !need_resched()) + check_preempt_equal_prio(rq, p); #endif } @@ -679,15 +818,8 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, idx = sched_find_first_bit(array->bitmap); BUG_ON(idx >= MAX_RT_PRIO); - queue = array->xqueue + idx; - if (!list_empty(queue)) - next = list_entry(queue->next, struct sched_rt_entity, - run_list); - else { - queue = array->squeue + idx; - next = list_entry(queue->next, struct sched_rt_entity, - run_list); - } + queue = array->queue + idx; + next = list_entry(queue->next, struct sched_rt_entity, run_list); return next; } @@ -757,7 +889,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) continue; if (next && next->prio < idx) continue; - list_for_each_entry(rt_se, array->squeue + idx, run_list) { + list_for_each_entry(rt_se, array->queue + idx, run_list) { struct task_struct *p = rt_task_of(rt_se); if (pick_rt_task(rq, p, cpu)) { next = p; @@ -804,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task) return -1; /* No targets found */ /* + * Only consider CPUs that are usable for migration. + * I guess we might want to change cpupri_find() to ignore those + * in the first place. + */ + cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); + + /* * At this point we have built a mask of cpus representing the * lowest priority tasks in the system. Now we want to elect * the best one based on our affinity and topology. @@ -989,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1139,14 +1278,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, } update_rt_migration(rq); - - if (unlikely(weight == 1 || p->rt.nr_cpus_allowed == 1)) - /* - * If either the new or old weight is a "1", we need - * to requeue to properly move between shared and - * exclusive queues. - */ - requeue_task_rt(rq, p); } p->cpus_allowed = *new_mask; @@ -1154,20 +1285,24 @@ static void set_cpus_allowed_rt(struct task_struct *p, } /* Assumes rq->lock is held */ -static void join_domain_rt(struct rq *rq) +static void rq_online_rt(struct rq *rq) { if (rq->rt.overloaded) rt_set_overload(rq); + __enable_runtime(rq); + cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); } /* Assumes rq->lock is held */ -static void leave_domain_rt(struct rq *rq) +static void rq_offline_rt(struct rq *rq) { if (rq->rt.overloaded) rt_clear_overload(rq); + __disable_runtime(rq); + cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); } @@ -1301,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) * on the queue: */ if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p); + requeue_task_rt(rq, p, 0); set_tsk_need_resched(p); } } @@ -1331,8 +1466,8 @@ static const struct sched_class rt_sched_class = { .load_balance = load_balance_rt, .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, - .join_domain = join_domain_rt, - .leave_domain = leave_domain_rt, + .rq_online = rq_online_rt, + .rq_offline = rq_offline_rt, .pre_schedule = pre_schedule_rt, .post_schedule = post_schedule_rt, .task_wake_up = task_wake_up_rt, @@ -1345,3 +1480,17 @@ static const struct sched_class rt_sched_class = { .prio_changed = prio_changed_rt, .switched_to = switched_to_rt, }; + +#ifdef CONFIG_SCHED_DEBUG +extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); + +static void print_rt_stats(struct seq_file *m, int cpu) +{ + struct rt_rq *rt_rq; + + rcu_read_lock(); + for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) + print_rt_rq(m, cpu, rt_rq); + rcu_read_unlock(); +} +#endif /* CONFIG_SCHED_DEBUG */