X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched_rt.c;h=908c04f9dad02d23df66fbc55a28f5619fc10081;hb=2e489e077a6ad118c4f247faedf330117b107cce;hp=837241568d767a2f0b25298544921088bfba4941;hpb=ad2a3f13b7258a5daaaeb8cff9f835aac468b71d;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 8372415..908c04f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) static inline void rt_set_overload(struct rq *rq) { + if (!rq->online) + return; + cpu_set(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set @@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) static inline void rt_clear_overload(struct rq *rq) { + if (!rq->online) + return; + /* the order here really doesn't matter */ atomic_dec(&rq->rd->rto_count); cpu_clear(rq->cpu, rq->rd->rto_mask); @@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) return &rt_rq->tg->rt_bandwidth; } -#else +#else /* !CONFIG_RT_GROUP_SCHED */ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { @@ -220,48 +226,10 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) return &def_rt_bandwidth; } -#endif - -static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) -{ - int i, idle = 1; - cpumask_t span; - - if (rt_b->rt_runtime == RUNTIME_INF) - return 1; - - span = sched_rt_period_mask(); - for_each_cpu_mask(i, span) { - int enqueue = 0; - struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); - struct rq *rq = rq_of_rt_rq(rt_rq); - - spin_lock(&rq->lock); - if (rt_rq->rt_time) { - u64 runtime; - - spin_lock(&rt_rq->rt_runtime_lock); - runtime = rt_rq->rt_runtime; - rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); - if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { - rt_rq->rt_throttled = 0; - enqueue = 1; - } - if (rt_rq->rt_time || rt_rq->rt_nr_running) - idle = 0; - spin_unlock(&rt_rq->rt_runtime_lock); - } - - if (enqueue) - sched_rt_rq_enqueue(rt_rq); - spin_unlock(&rq->lock); - } - - return idle; -} +#endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_SMP -static int balance_runtime(struct rt_rq *rt_rq) +static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); struct root_domain *rd = cpu_rq(smp_processor_id())->rd; @@ -272,7 +240,7 @@ static int balance_runtime(struct rt_rq *rt_rq) spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu_mask(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -280,9 +248,12 @@ static int balance_runtime(struct rt_rq *rt_rq) continue; spin_lock(&iter->rt_runtime_lock); + if (iter->rt_runtime == RUNTIME_INF) + goto next; + diff = iter->rt_runtime - iter->rt_time; if (diff > 0) { - do_div(diff, weight); + diff = div_u64((u64)diff, weight); if (rt_rq->rt_runtime + diff > rt_period) diff = rt_period - rt_rq->rt_runtime; iter->rt_runtime -= diff; @@ -293,13 +264,163 @@ static int balance_runtime(struct rt_rq *rt_rq) break; } } +next: spin_unlock(&iter->rt_runtime_lock); } spin_unlock(&rt_b->rt_runtime_lock); return more; } -#endif + +static void __disable_runtime(struct rq *rq) +{ + struct root_domain *rd = rq->rd; + struct rt_rq *rt_rq; + + if (unlikely(!scheduler_running)) + return; + + for_each_leaf_rt_rq(rt_rq, rq) { + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + s64 want; + int i; + + spin_lock(&rt_b->rt_runtime_lock); + spin_lock(&rt_rq->rt_runtime_lock); + if (rt_rq->rt_runtime == RUNTIME_INF || + rt_rq->rt_runtime == rt_b->rt_runtime) + goto balanced; + spin_unlock(&rt_rq->rt_runtime_lock); + + want = rt_b->rt_runtime - rt_rq->rt_runtime; + + for_each_cpu_mask(i, rd->span) { + struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); + s64 diff; + + if (iter == rt_rq) + continue; + + spin_lock(&iter->rt_runtime_lock); + if (want > 0) { + diff = min_t(s64, iter->rt_runtime, want); + iter->rt_runtime -= diff; + want -= diff; + } else { + iter->rt_runtime -= want; + want -= want; + } + spin_unlock(&iter->rt_runtime_lock); + + if (!want) + break; + } + + spin_lock(&rt_rq->rt_runtime_lock); + BUG_ON(want); +balanced: + rt_rq->rt_runtime = RUNTIME_INF; + spin_unlock(&rt_rq->rt_runtime_lock); + spin_unlock(&rt_b->rt_runtime_lock); + } +} + +static void disable_runtime(struct rq *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + __disable_runtime(rq); + spin_unlock_irqrestore(&rq->lock, flags); +} + +static void __enable_runtime(struct rq *rq) +{ + struct rt_rq *rt_rq; + + if (unlikely(!scheduler_running)) + return; + + for_each_leaf_rt_rq(rt_rq, rq) { + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + + spin_lock(&rt_b->rt_runtime_lock); + spin_lock(&rt_rq->rt_runtime_lock); + rt_rq->rt_runtime = rt_b->rt_runtime; + rt_rq->rt_time = 0; + spin_unlock(&rt_rq->rt_runtime_lock); + spin_unlock(&rt_b->rt_runtime_lock); + } +} + +static void enable_runtime(struct rq *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + __enable_runtime(rq); + spin_unlock_irqrestore(&rq->lock, flags); +} + +static int balance_runtime(struct rt_rq *rt_rq) +{ + int more = 0; + + if (rt_rq->rt_time > rt_rq->rt_runtime) { + spin_unlock(&rt_rq->rt_runtime_lock); + more = do_balance_runtime(rt_rq); + spin_lock(&rt_rq->rt_runtime_lock); + } + + return more; +} +#else /* !CONFIG_SMP */ +static inline int balance_runtime(struct rt_rq *rt_rq) +{ + return 0; +} +#endif /* CONFIG_SMP */ + +static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) +{ + int i, idle = 1; + cpumask_t span; + + if (rt_b->rt_runtime == RUNTIME_INF) + return 1; + + span = sched_rt_period_mask(); + for_each_cpu_mask(i, span) { + int enqueue = 0; + struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); + struct rq *rq = rq_of_rt_rq(rt_rq); + + spin_lock(&rq->lock); + if (rt_rq->rt_time) { + u64 runtime; + + spin_lock(&rt_rq->rt_runtime_lock); + if (rt_rq->rt_throttled) + balance_runtime(rt_rq); + runtime = rt_rq->rt_runtime; + rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); + if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { + rt_rq->rt_throttled = 0; + enqueue = 1; + } + if (rt_rq->rt_time || rt_rq->rt_nr_running) + idle = 0; + spin_unlock(&rt_rq->rt_runtime_lock); + } else if (rt_rq->rt_nr_running) + idle = 0; + + if (enqueue) + sched_rt_rq_enqueue(rt_rq); + spin_unlock(&rq->lock); + } + + return idle; +} static inline int rt_se_prio(struct sched_rt_entity *rt_se) { @@ -326,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) return 0; -#ifdef CONFIG_SMP - if (rt_rq->rt_time > runtime) { - int more; - - spin_unlock(&rt_rq->rt_runtime_lock); - more = balance_runtime(rt_rq); - spin_lock(&rt_rq->rt_runtime_lock); - - if (more) - runtime = sched_rt_runtime(rt_rq); - } -#endif + balance_runtime(rt_rq); + runtime = sched_rt_runtime(rt_rq); + if (runtime == RUNTIME_INF) + return 0; if (rt_rq->rt_time > runtime) { rt_rq->rt_throttled = 1; @@ -391,12 +504,23 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(rt_se_prio(rt_se))); rt_rq->rt_nr_running++; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED - if (rt_se_prio(rt_se) < rt_rq->highest_prio) + if (rt_se_prio(rt_se) < rt_rq->highest_prio) { +#ifdef CONFIG_SMP + struct rq *rq = rq_of_rt_rq(rt_rq); +#endif + rt_rq->highest_prio = rt_se_prio(rt_se); +#ifdef CONFIG_SMP + if (rq->online) + cpupri_set(&rq->rd->cpupri, rq->cpu, + rt_se_prio(rt_se)); +#endif + } #endif #ifdef CONFIG_SMP if (rt_se->nr_cpus_allowed > 1) { struct rq *rq = rq_of_rt_rq(rt_rq); + rq->rt.rt_nr_migratory++; } @@ -416,6 +540,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static inline void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { +#ifdef CONFIG_SMP + int highest_prio = rt_rq->highest_prio; +#endif + WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_rq->rt_nr_running); rt_rq->rt_nr_running--; @@ -439,6 +567,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rq->rt.rt_nr_migratory--; } + if (rt_rq->highest_prio != highest_prio) { + struct rq *rq = rq_of_rt_rq(rt_rq); + + if (rq->online) + cpupri_set(&rq->rd->cpupri, rq->cpu, + rt_rq->highest_prio); + } + update_rt_migration(rq_of_rt_rq(rt_rq)); #endif /* CONFIG_SMP */ #ifdef CONFIG_RT_GROUP_SCHED @@ -454,6 +590,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rt_prio_array *array = &rt_rq->active; struct rt_rq *group_rq = group_rt_rq(rt_se); + struct list_head *queue = array->queue + rt_se_prio(rt_se); /* * Don't enqueue the group if its throttled, or when empty. @@ -464,7 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); + list_add_tail(&rt_se->run_list, queue); __set_bit(rt_se_prio(rt_se), array->bitmap); inc_rt_tasks(rt_se, rt_rq); @@ -531,6 +668,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) rt_se->timeout = 0; enqueue_rt_entity(rt_se); + + inc_cpu_load(rq, p->se.load.weight); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) @@ -539,34 +678,42 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) update_curr_rt(rq); dequeue_rt_entity(rt_se); + + dec_cpu_load(rq, p->se.load.weight); } /* * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */ -static -void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) +static void +requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) { - struct rt_prio_array *array = &rt_rq->active; + if (on_rt_rq(rt_se)) { + struct rt_prio_array *array = &rt_rq->active; + struct list_head *queue = array->queue + rt_se_prio(rt_se); - list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); + if (head) + list_move(&rt_se->run_list, queue); + else + list_move_tail(&rt_se->run_list, queue); + } } -static void requeue_task_rt(struct rq *rq, struct task_struct *p) +static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) { struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq; for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); - requeue_rt_entity(rt_rq, rt_se); + requeue_rt_entity(rt_rq, rt_se, head); } } static void yield_task_rt(struct rq *rq) { - requeue_task_rt(rq, rq->curr); + requeue_task_rt(rq, rq->curr, 0); } #ifdef CONFIG_SMP @@ -606,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync) */ return task_cpu(p); } + +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) +{ + cpumask_t mask; + + if (rq->curr->rt.nr_cpus_allowed == 1) + return; + + if (p->rt.nr_cpus_allowed != 1 + && cpupri_find(&rq->rd->cpupri, p, &mask)) + return; + + if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + return; + + /* + * There appears to be other cpus that can accept + * current and none to run 'p', so lets reschedule + * to try and push current away: + */ + requeue_task_rt(rq, p, 1); + resched_task(rq->curr); +} + #endif /* CONFIG_SMP */ /* @@ -613,8 +784,27 @@ static int select_task_rq_rt(struct task_struct *p, int sync) */ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) { - if (p->prio < rq->curr->prio) + if (p->prio < rq->curr->prio) { resched_task(rq->curr); + return; + } + +#ifdef CONFIG_SMP + /* + * If: + * + * - the newly woken task is of equal priority to the current task + * - the newly woken task is non-migratable while current is migratable + * - current will be preempted on the next reschedule + * + * we should check to see if current can readily move to a different + * cpu. If so, we will reschedule to allow the push logic to try + * to move current somewhere else, making room for our non-migratable + * task. + */ + if (p->prio == rq->curr->prio && !need_resched()) + check_preempt_equal_prio(rq, p); +#endif } static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, @@ -717,73 +907,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); -static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) -{ - int lowest_prio = -1; - int lowest_cpu = -1; - int count = 0; - int cpu; - - cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); - - /* - * Scan each rq for the lowest prio. - */ - for_each_cpu_mask(cpu, *lowest_mask) { - struct rq *rq = cpu_rq(cpu); - - /* We look for lowest RT prio or non-rt CPU */ - if (rq->rt.highest_prio >= MAX_RT_PRIO) { - /* - * if we already found a low RT queue - * and now we found this non-rt queue - * clear the mask and set our bit. - * Otherwise just return the queue as is - * and the count==1 will cause the algorithm - * to use the first bit found. - */ - if (lowest_cpu != -1) { - cpus_clear(*lowest_mask); - cpu_set(rq->cpu, *lowest_mask); - } - return 1; - } - - /* no locking for now */ - if ((rq->rt.highest_prio > task->prio) - && (rq->rt.highest_prio >= lowest_prio)) { - if (rq->rt.highest_prio > lowest_prio) { - /* new low - clear old data */ - lowest_prio = rq->rt.highest_prio; - lowest_cpu = cpu; - count = 0; - } - count++; - } else - cpu_clear(cpu, *lowest_mask); - } - - /* - * Clear out all the set bits that represent - * runqueues that were of higher prio than - * the lowest_prio. - */ - if (lowest_cpu > 0) { - /* - * Perhaps we could add another cpumask op to - * zero out bits. Like cpu_zero_bits(cpumask, nrbits); - * Then that could be optimized to use memset and such. - */ - for_each_cpu_mask(cpu, *lowest_mask) { - if (cpu >= lowest_cpu) - break; - cpu_clear(cpu, *lowest_mask); - } - } - - return count; -} - static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) { int first; @@ -805,17 +928,19 @@ static int find_lowest_rq(struct task_struct *task) cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); - int count = find_lowest_cpus(task, lowest_mask); - if (!count) + if (task->rt.nr_cpus_allowed == 1) + return -1; /* No other targets possible */ + + if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ /* - * There is no sense in performing an optimal search if only one - * target is found. + * Only consider CPUs that are usable for migration. + * I guess we might want to change cpupri_find() to ignore those + * in the first place. */ - if (count == 1) - return first_cpu(*lowest_mask); + cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); /* * At this point we have built a mask of cpus representing the @@ -1003,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1160,17 +1285,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, } /* Assumes rq->lock is held */ -static void join_domain_rt(struct rq *rq) +static void rq_online_rt(struct rq *rq) { if (rq->rt.overloaded) rt_set_overload(rq); + + __enable_runtime(rq); + + cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); } /* Assumes rq->lock is held */ -static void leave_domain_rt(struct rq *rq) +static void rq_offline_rt(struct rq *rq) { if (rq->rt.overloaded) rt_clear_overload(rq); + + __disable_runtime(rq); + + cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); } /* @@ -1303,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) * on the queue: */ if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p); + requeue_task_rt(rq, p, 0); set_tsk_need_resched(p); } } @@ -1333,8 +1466,8 @@ static const struct sched_class rt_sched_class = { .load_balance = load_balance_rt, .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, - .join_domain = join_domain_rt, - .leave_domain = leave_domain_rt, + .rq_online = rq_online_rt, + .rq_offline = rq_offline_rt, .pre_schedule = pre_schedule_rt, .post_schedule = post_schedule_rt, .task_wake_up = task_wake_up_rt, @@ -1347,3 +1480,17 @@ static const struct sched_class rt_sched_class = { .prio_changed = prio_changed_rt, .switched_to = switched_to_rt, }; + +#ifdef CONFIG_SCHED_DEBUG +extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); + +static void print_rt_stats(struct seq_file *m, int cpu) +{ + struct rt_rq *rt_rq; + + rcu_read_lock(); + for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) + print_rt_rq(m, cpu, rt_rq); + rcu_read_unlock(); +} +#endif /* CONFIG_SCHED_DEBUG */