X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched_fair.c;h=24086e7e75937951261359ba09ed377a8ea587d0;hb=710390d90f143a9ebb87a475215140f426792efd;hp=280892e9d85e3543564a1397cd51ad82158f7545;hpb=e69b0f1b41c0e57bb1e29100b5810a5914efcb45;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 280892e..24086e7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) #ifdef CONFIG_SCHED_DEBUG int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) return ret; @@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq) if (entity_is_task(curr)) { struct task_struct *curtask = task_of(curr); + trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); cpuacct_charge(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); } @@ -709,24 +710,28 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) if (initial && sched_feat(START_DEBIT)) vruntime += sched_vslice(cfs_rq, se); - if (!initial) { - /* sleeps upto a single latency don't count. */ - if (sched_feat(NEW_FAIR_SLEEPERS)) { - unsigned long thresh = sysctl_sched_latency; + /* sleeps up to a single latency don't count. */ + if (!initial && sched_feat(FAIR_SLEEPERS)) { + unsigned long thresh = sysctl_sched_latency; - /* - * Convert the sleeper threshold into virtual time. - * SCHED_IDLE is a special sub-class. We care about - * fairness only relative to other SCHED_IDLE tasks, - * all of which have the same weight. - */ - if (sched_feat(NORMALIZED_SLEEPER) && - (!entity_is_task(se) || - task_of(se)->policy != SCHED_IDLE)) - thresh = calc_delta_fair(thresh, se); + /* + * Convert the sleeper threshold into virtual time. + * SCHED_IDLE is a special sub-class. We care about + * fairness only relative to other SCHED_IDLE tasks, + * all of which have the same weight. + */ + if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) || + task_of(se)->policy != SCHED_IDLE)) + thresh = calc_delta_fair(thresh, se); - vruntime -= thresh; - } + /* + * Halve their sleep time's effect, to allow + * for a gentler effect of sleepers: + */ + if (sched_feat(GENTLE_FAIR_SLEEPERS)) + thresh >>= 1; + + vruntime -= thresh; } /* ensure we never gain time by being placed backwards. */ @@ -757,10 +762,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) { - if (cfs_rq->last == se) + if (!se || cfs_rq->last == se) cfs_rq->last = NULL; - if (cfs_rq->next == se) + if (!se || cfs_rq->next == se) cfs_rq->next = NULL; } @@ -1241,26 +1246,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) */ static struct sched_group * find_idlest_group(struct sched_domain *sd, struct task_struct *p, - int this_cpu, int flag) + int this_cpu, int load_idx) { struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; unsigned long min_load = ULONG_MAX, this_load = 0; int imbalance = 100 + (sd->imbalance_pct-100)/2; - int load_idx = 0; - - switch (flag) { - case SD_BALANCE_FORK: - case SD_BALANCE_EXEC: - load_idx = sd->forkexec_idx; - break; - - case SD_BALANCE_WAKE: - load_idx = sd->wake_idx; - break; - - default: - break; - } do { unsigned long load, avg_load; @@ -1329,6 +1319,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) } /* + * Try and locate an idle CPU in the sched_domain. + */ +static int +select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) +{ + int cpu = smp_processor_id(); + int prev_cpu = task_cpu(p); + int i; + + /* + * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE + * test in select_task_rq_fair) and the prev_cpu is idle then that's + * always a better target than the current cpu. + */ + if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) + return prev_cpu; + + /* + * Otherwise, iterate the domain and find an elegible idle cpu. + */ + for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { + if (!cpu_rq(i)->cfs.nr_running) { + target = i; + break; + } + } + + return target; +} + +/* * sched_balance_self: balance the current task (running on cpu) in domains * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and * SD_BALANCE_EXEC. @@ -1339,17 +1360,19 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) * * preempt must be disabled. */ -static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags) +static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) { - struct sched_domain *tmp, *sd = NULL; + struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); int new_cpu = cpu; int want_affine = 0; - int sync = flags & WF_SYNC; + int want_sd = 1; + int sync = wake_flags & WF_SYNC; if (sd_flag & SD_BALANCE_WAKE) { - if (sched_feat(AFFINE_WAKEUPS)) + if (sched_feat(AFFINE_WAKEUPS) && + cpumask_test_cpu(cpu, &p->cpus_allowed)) want_affine = 1; new_cpu = prev_cpu; } @@ -1360,7 +1383,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags) * If power savings logic is enabled for a domain, see if we * are not overloaded, if so, don't balance wider. */ - if (tmp->flags & SD_POWERSAVINGS_BALANCE) { + if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { unsigned long power = 0; unsigned long nr_running = 0; unsigned long capacity; @@ -1373,41 +1396,75 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags) capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); - if (nr_running/2 < capacity) - break; - } + if (tmp->flags & SD_POWERSAVINGS_BALANCE) + nr_running /= 2; - switch (sd_flag) { - case SD_BALANCE_WAKE: - if (!sched_feat(LB_WAKEUP_UPDATE)) - break; - case SD_BALANCE_FORK: - case SD_BALANCE_EXEC: - if (root_task_group_empty()) - break; - update_shares(tmp); - default: - break; + if (nr_running < capacity) + want_sd = 0; } - if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && - cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { + /* + * While iterating the domains looking for a spanning + * WAKE_AFFINE domain, adjust the affine target to any idle cpu + * in cache sharing domains along the way. + */ + if (want_affine) { + int target = -1; + + /* + * If both cpu and prev_cpu are part of this domain, + * cpu is a valid SD_WAKE_AFFINE target. + */ + if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) + target = cpu; - if (wake_affine(tmp, p, sync)) { - new_cpu = cpu; - goto out; + /* + * If there's an idle sibling in this domain, make that + * the wake_affine target instead of the current cpu. + */ + if (tmp->flags & SD_PREFER_SIBLING) + target = select_idle_sibling(p, tmp, target); + + if (target >= 0) { + if (tmp->flags & SD_WAKE_AFFINE) { + affine_sd = tmp; + want_affine = 0; + } + cpu = target; } - - want_affine = 0; } + if (!want_sd && !want_affine) + break; + if (!(tmp->flags & sd_flag)) continue; - sd = tmp; + if (want_sd) + sd = tmp; + } + + if (sched_feat(LB_SHARES_UPDATE)) { + /* + * Pick the largest domain to update shares over + */ + tmp = sd; + if (affine_sd && (!tmp || + cpumask_weight(sched_domain_span(affine_sd)) > + cpumask_weight(sched_domain_span(sd)))) + tmp = affine_sd; + + if (tmp) + update_shares(tmp); + } + + if (affine_sd && wake_affine(affine_sd, p, sync)) { + new_cpu = cpu; + goto out; } while (sd) { + int load_idx = sd->forkexec_idx; struct sched_group *group; int weight; @@ -1416,7 +1473,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags) continue; } - group = find_idlest_group(sd, p, cpu, sd_flag); + if (sd_flag & SD_BALANCE_WAKE) + load_idx = sd->wake_idx; + + group = find_idlest_group(sd, p, cpu, load_idx); if (!group) { sd = sd->child; continue; @@ -1557,12 +1617,12 @@ static void set_next_buddy(struct sched_entity *se) /* * Preempt the current task with a newly woken task if needed: */ -static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags) +static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) { struct task_struct *curr = rq->curr; struct sched_entity *se = &curr->se, *pse = &p->se; struct cfs_rq *cfs_rq = task_cfs_rq(curr); - int sync = flags & WF_SYNC; + int sync = wake_flags & WF_SYNC; update_curr(cfs_rq); @@ -1588,7 +1648,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags */ if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) set_last_buddy(se); - if (sched_feat(NEXT_BUDDY) && !(flags & WF_FORK)) + if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) set_next_buddy(pse); /* @@ -1611,9 +1671,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags return; } - if (!sched_feat(WAKEUP_PREEMPT)) - return; - if ((sched_feat(WAKEUP_SYNC) && sync) || (sched_feat(WAKEUP_OVERLAP) && (se->avg_overlap < sysctl_sched_migration_cost && @@ -1622,6 +1679,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags return; } + if (sched_feat(WAKEUP_RUNNING)) { + if (pse->avg_running < se->avg_running) { + set_next_buddy(pse); + resched_task(curr); + return; + } + } + + if (!sched_feat(WAKEUP_PREEMPT)) + return; + find_matching_se(&se, &pse); BUG_ON(!pse); @@ -1636,7 +1704,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; - if (unlikely(!cfs_rq->nr_running)) + if (!cfs_rq->nr_running) return NULL; do { @@ -1644,8 +1712,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) /* * If se was a buddy, clear it so that it will have to earn * the favour again. + * + * If se was not a buddy, clear the buddies because neither + * was elegible to run, let them earn it again. + * + * IOW. unconditionally clear buddies. */ - __clear_buddies(cfs_rq, se); + __clear_buddies(cfs_rq, NULL); set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); @@ -1921,6 +1994,25 @@ static void moved_group_fair(struct task_struct *p) } #endif +unsigned int get_rr_interval_fair(struct task_struct *task) +{ + struct sched_entity *se = &task->se; + unsigned long flags; + struct rq *rq; + unsigned int rr_interval = 0; + + /* + * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise + * idle runqueue: + */ + rq = task_rq_lock(task, &flags); + if (rq->cfs.load.weight) + rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); + task_rq_unlock(rq, &flags); + + return rr_interval; +} + /* * All the scheduling class methods: */ @@ -1949,6 +2041,8 @@ static const struct sched_class fair_sched_class = { .prio_changed = prio_changed_fair, .switched_to = switched_to_fair, + .get_rr_interval = get_rr_interval_fair, + #ifdef CONFIG_FAIR_GROUP_SCHED .moved_group = moved_group_fair, #endif