X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched_fair.c;h=0566f2a03c420717e6604bde5deac33cb71f6ed1;hb=7e0f7cf582abd6c85232331dfe726a4e4b0fd98e;hp=ce514afd78ff7998338b6661ae22475e808fa3fa;hpb=3f3a490480d8ab96e0fe30a41f80f14e6a0c579d;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ce514af..0566f2a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -283,7 +283,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) struct sched_entity, run_node); - if (vruntime == cfs_rq->min_vruntime) + if (!cfs_rq->curr) vruntime = se->vruntime; else vruntime = min_vruntime(vruntime, se->vruntime); @@ -341,23 +341,20 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->rb_leftmost = next_node; } - if (cfs_rq->next == se) - cfs_rq->next = NULL; - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } -static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) -{ - return cfs_rq->rb_leftmost; -} - static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) { - return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); + struct rb_node *left = cfs_rq->rb_leftmost; + + if (!left) + return NULL; + + return rb_entry(left, struct sched_entity, run_node); } -static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -389,20 +386,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, #endif /* - * delta *= P[w / rw] - */ -static inline unsigned long -calc_delta_weight(unsigned long delta, struct sched_entity *se) -{ - for_each_sched_entity(se) { - delta = calc_delta_mine(delta, - se->load.weight, &cfs_rq_of(se)->load); - } - - return delta; -} - -/* * delta /= w */ static inline unsigned long @@ -443,12 +426,23 @@ static u64 __sched_period(unsigned long nr_running) */ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { - unsigned long nr_running = cfs_rq->nr_running; + u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); + + for_each_sched_entity(se) { + struct load_weight *load; + + cfs_rq = cfs_rq_of(se); + load = &cfs_rq->load; - if (unlikely(!se->on_rq)) - nr_running++; + if (unlikely(!se->on_rq)) { + struct load_weight lw = cfs_rq->load; - return calc_delta_weight(__sched_period(nr_running), se); + update_load_add(&lw, se->load.weight); + load = &lw; + } + slice = calc_delta_mine(slice, se->load.weight, load); + } + return slice; } /* @@ -495,6 +489,8 @@ static void update_curr(struct cfs_rq *cfs_rq) * overflow on 32 bits): */ delta_exec = (unsigned long)(now - curr->exec_start); + if (!delta_exec) + return; __update_curr(cfs_rq, curr, delta_exec); curr->exec_start = now; @@ -684,9 +680,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) unsigned long thresh = sysctl_sched_latency; /* - * convert the sleeper threshold into virtual time + * Convert the sleeper threshold into virtual time. + * SCHED_IDLE is a special sub-class. We care about + * fairness only relative to other SCHED_IDLE tasks, + * all of which have the same weight. */ - if (sched_feat(NORMALIZED_SLEEPER)) + if (sched_feat(NORMALIZED_SLEEPER) && + task_of(se)->policy != SCHED_IDLE) thresh = calc_delta_fair(thresh, se); vruntime -= thresh; @@ -719,6 +719,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) __enqueue_entity(cfs_rq, se); } +static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + if (cfs_rq->last == se) + cfs_rq->last = NULL; + + if (cfs_rq->next == se) + cfs_rq->next = NULL; +} + +static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + for_each_sched_entity(se) + __clear_buddies(cfs_rq_of(se), se); +} + static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) { @@ -741,6 +756,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) #endif } + clear_buddies(cfs_rq, se); + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); @@ -757,8 +774,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; - if (delta_exec > ideal_runtime) + if (delta_exec > ideal_runtime) { resched_task(rq_of(cfs_rq)->curr); + /* + * The current task ran long enough, ensure it doesn't get + * re-elected due to buddy favours. + */ + clear_buddies(cfs_rq, curr); + } } static void @@ -794,24 +817,15 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static int wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); -static struct sched_entity * -pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) - return se; - - return cfs_rq->next; -} - static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { - struct sched_entity *se = NULL; + struct sched_entity *se = __pick_next_entity(cfs_rq); - if (first_fair(cfs_rq)) { - se = __pick_next_entity(cfs_rq); - se = pick_next(cfs_rq, se); - set_next_entity(cfs_rq, se); - } + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) + return cfs_rq->next; + + if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) + return cfs_rq->last; return se; } @@ -983,6 +997,8 @@ static void yield_task_fair(struct rq *rq) if (unlikely(cfs_rq->nr_running == 1)) return; + clear_buddies(cfs_rq, se); + if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { update_rq_clock(rq); /* @@ -1016,16 +1032,33 @@ static void yield_task_fair(struct rq *rq) * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_map) + * hence we need to mask them out (cpu_active_mask) * * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) static int wake_idle(int cpu, struct task_struct *p) { - cpumask_t tmp; struct sched_domain *sd; int i; + unsigned int chosen_wakeup_cpu; + int this_cpu; + + /* + * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu + * are idle and this is not a kernel thread and this task's affinity + * allows it to be moved to preferred cpu, then just move! + */ + + this_cpu = smp_processor_id(); + chosen_wakeup_cpu = + cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; + + if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && + idle_cpu(cpu) && idle_cpu(this_cpu) && + p->mm && !(p->flags & PF_KTHREAD) && + cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) + return chosen_wakeup_cpu; /* * If it is idle, then it is the best cpu to run this task. @@ -1043,10 +1076,9 @@ static int wake_idle(int cpu, struct task_struct *p) if ((sd->flags & SD_WAKE_IDLE) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { - cpus_and(tmp, sd->span, p->cpus_allowed); - cpus_and(tmp, tmp, cpu_active_map); - for_each_cpu_mask_nr(i, tmp) { - if (idle_cpu(i)) { + for_each_cpu_and(i, sched_domain_span(sd), + &p->cpus_allowed) { + if (cpu_active(i) && idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, se.nr_wakeups_idle); @@ -1239,13 +1271,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync) * this_cpu and prev_cpu are present in: */ for_each_domain(this_cpu, sd) { - if (cpu_isset(prev_cpu, sd->span)) { + if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { this_sd = sd; break; } } - if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) + if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) goto out; /* @@ -1325,26 +1357,56 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) return 0; } +static void set_last_buddy(struct sched_entity *se) +{ + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; + } +} + +static void set_next_buddy(struct sched_entity *se) +{ + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; + } +} + /* * Preempt the current task with a newly woken task if needed: */ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + + update_curr(cfs_rq); if (unlikely(rt_prio(p->prio))) { - update_rq_clock(rq); - update_curr(cfs_rq); resched_task(curr); return; } + if (unlikely(p->sched_class != &fair_sched_class)) + return; + if (unlikely(se == pse)) return; - cfs_rq_of(pse)->next = pse; + /* + * Only set the backward buddy when the current task is still on the + * rq. This can happen when a wakeup gets interleaved with schedule on + * the ->pre_schedule() or idle_balance() point, either of which can + * drop the rq lock. + * + * Also, during early boot the idle thread is in the fair class, for + * obvious reasons its a bad idea to schedule back to the idle thread. + */ + if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) + set_last_buddy(se); + set_next_buddy(pse); /* * We can come here with TIF_NEED_RESCHED already set from new task @@ -1354,11 +1416,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; /* - * Batch tasks do not preempt (their preemption is driven by + * Batch and idle tasks do not preempt (their preemption is driven by * the tick): */ - if (unlikely(p->policy == SCHED_BATCH)) + if (unlikely(p->policy != SCHED_NORMAL)) + return; + + /* Idle tasks are by definition preempted by everybody. */ + if (unlikely(curr->policy == SCHED_IDLE)) { + resched_task(curr); return; + } if (!sched_feat(WAKEUP_PREEMPT)) return; @@ -1396,6 +1464,12 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) do { se = pick_next_entity(cfs_rq); + /* + * If se was a buddy, clear it so that it will have to earn + * the favour again. + */ + __clear_buddies(cfs_rq, se); + set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); @@ -1577,8 +1651,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) } } -#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0) - /* * Share the fairness runtime between parent and child, thus the * total amount of pressure for CPU stays equal - new tasks