X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched_fair.c;h=67c67a87146ebaa64514c4bdf8b0ae5de0d4cf47;hb=96ae6ea0be1b902c28b3b463c27da42b41e2b63a;hp=f0dd4be1a3a401e8cae84ded72a359b7676b1d22;hpb=7c92e54f6f9601cfa9d8894ee248abcf62ed9a1c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f0dd4be..67c67a8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -43,6 +43,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL; unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; /* + * sys_sched_yield() compat mode + * + * This option switches the agressive yield implementation of the + * old scheduler back on. + */ +unsigned int __read_mostly sysctl_sched_compat_yield; + +/* * SCHED_BATCH wake-up granularity. * (default: 25 msec, units: nanoseconds) * @@ -631,6 +639,16 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) se->block_start = 0; se->sum_sleep_runtime += delta; + + /* + * Blocking time is in units of nanosecs, so shift by 20 to + * get a milliseconds-range estimation of the amount of + * time that the task spent sleeping: + */ + if (unlikely(prof_on == SLEEP_PROFILING)) { + profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk), + delta >> 20); + } } #endif } @@ -678,16 +696,34 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, struct sched_entity *curr, unsigned long granularity) { s64 __delta = curr->fair_key - se->fair_key; + unsigned long ideal_runtime, delta_exec; + + /* + * ideal_runtime is compared against sum_exec_runtime, which is + * walltime, hence do not scale. + */ + ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, + (unsigned long)sysctl_sched_min_granularity); + + /* + * If we executed more than what the latency constraint suggests, + * reduce the rescheduling granularity. This way the total latency + * of how much a task is not scheduled converges to + * sysctl_sched_latency: + */ + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; + if (delta_exec > ideal_runtime) + granularity = 0; /* * Take scheduling granularity into account - do not * preempt the current task unless the best task has * a larger than sched_granularity fairness advantage: + * + * scale granularity as key space is in fair_clock. */ - if (__delta > niced_granularity(curr, granularity)) { + if (__delta > niced_granularity(curr, granularity)) resched_task(rq_of(cfs_rq)->curr); - curr->prev_sum_exec_runtime = curr->sum_exec_runtime; - } } static inline void @@ -703,6 +739,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_end(cfs_rq, se); update_stats_curr_start(cfs_rq, se); set_cfs_rq_curr(cfs_rq, se); + se->prev_sum_exec_runtime = se->sum_exec_runtime; } static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) @@ -732,7 +769,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) { - unsigned long gran, ideal_runtime, delta_exec; struct sched_entity *next; /* @@ -749,21 +785,8 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) if (next == curr) return; - gran = sched_granularity(cfs_rq); - ideal_runtime = niced_granularity(curr, - max(sysctl_sched_latency / cfs_rq->nr_running, - (unsigned long)sysctl_sched_min_granularity)); - /* - * If we executed more than what the latency constraint suggests, - * reduce the rescheduling granularity. This way the total latency - * of how much a task is not scheduled converges to - * sysctl_sched_latency: - */ - delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; - if (delta_exec > ideal_runtime) - gran = 0; - - __check_preempt_curr_fair(cfs_rq, next, curr, gran); + __check_preempt_curr_fair(cfs_rq, next, curr, + sched_granularity(cfs_rq)); } /************************************************** @@ -892,19 +915,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) } /* - * sched_yield() support is very simple - we dequeue and enqueue + * sched_yield() support is very simple - we dequeue and enqueue. + * + * If compat_yield is turned on then we requeue to the end of the tree. */ static void yield_task_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); + struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; + struct sched_entity *rightmost, *se = &p->se; + struct rb_node *parent; - __update_rq_clock(rq); /* - * Dequeue and enqueue the task to update its - * position within the tree: + * Are we the only task in the tree? + */ + if (unlikely(cfs_rq->nr_running == 1)) + return; + + if (likely(!sysctl_sched_compat_yield)) { + __update_rq_clock(rq); + /* + * Dequeue and enqueue the task to update its + * position within the tree: + */ + dequeue_entity(cfs_rq, &p->se, 0); + enqueue_entity(cfs_rq, &p->se, 0); + + return; + } + /* + * Find the rightmost entry in the rbtree: + */ + do { + parent = *link; + link = &parent->rb_right; + } while (*link); + + rightmost = rb_entry(parent, struct sched_entity, run_node); + /* + * Already in the rightmost position? + */ + if (unlikely(rightmost == se)) + return; + + /* + * Minimally necessary key value to be last in the tree: */ - dequeue_entity(cfs_rq, &p->se, 0); - enqueue_entity(cfs_rq, &p->se, 0); + se->fair_key = rightmost->fair_key + 1; + + if (cfs_rq->rb_leftmost == &se->run_node) + cfs_rq->rb_leftmost = rb_next(&se->run_node); + /* + * Relink the task to the rightmost position: + */ + rb_erase(&se->run_node, &cfs_rq->tasks_timeline); + rb_link_node(&se->run_node, parent, link); + rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); } /*