X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched.c;h=6c10fa796ca040ef8da1f1e65acd58e4d7644faa;hb=e86908614f2c7fec401827e5cefd7a6ea9407f85;hp=d613723f324fd3f056483926b933cfcb4a9f968c;hpb=b04a0f4c1651a553ee1a03dc70297d66ec74db5c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched.c b/kernel/sched.c index d613723..6c10fa7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -262,7 +263,9 @@ struct rq { s64 clock_max_delta; unsigned int clock_warps, clock_overflows; - unsigned int clock_unstable_events; + u64 idle_clock; + unsigned int clock_deep_idle_events; + u64 tick_timestamp; atomic_t nr_iowait; @@ -341,8 +344,11 @@ static void __update_rq_clock(struct rq *rq) /* * Catch too large forward jumps too: */ - if (unlikely(delta > 2*TICK_NSEC)) { - clock++; + if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) { + if (clock < rq->tick_timestamp + TICK_NSEC) + clock = rq->tick_timestamp + TICK_NSEC; + else + clock++; rq->clock_overflows++; } else { if (unlikely(delta > rq->clock_max_delta)) @@ -361,19 +367,6 @@ static void update_rq_clock(struct rq *rq) __update_rq_clock(rq); } -static u64 __rq_clock(struct rq *rq) -{ - __update_rq_clock(rq); - - return rq->clock; -} - -static u64 rq_clock(struct rq *rq) -{ - update_rq_clock(rq); - return rq->clock; -} - /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -565,18 +558,40 @@ static inline struct rq *this_rq_lock(void) } /* - * CPU frequency is/was unstable - start new by setting prev_clock_raw: + * We are going deep-idle (irqs are disabled): */ -void sched_clock_unstable_event(void) +void sched_clock_idle_sleep_event(void) { - unsigned long flags; - struct rq *rq; + struct rq *rq = cpu_rq(smp_processor_id()); - rq = task_rq_lock(current, &flags); - rq->prev_clock_raw = sched_clock(); - rq->clock_unstable_events++; - task_rq_unlock(rq, &flags); + spin_lock(&rq->lock); + __update_rq_clock(rq); + spin_unlock(&rq->lock); + rq->clock_deep_idle_events++; } +EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); + +/* + * We just idled delta nanoseconds (called with irqs disabled): + */ +void sched_clock_idle_wakeup_event(u64 delta_ns) +{ + struct rq *rq = cpu_rq(smp_processor_id()); + u64 now = sched_clock(); + + rq->idle_clock += delta_ns; + /* + * Override the previous timestamp and ignore all + * sched_clock() deltas that occured while we idled, + * and use the PM-provided delta_ns to advance the + * rq clock: + */ + spin_lock(&rq->lock); + rq->prev_clock_raw = now; + rq->clock += delta_ns; + spin_unlock(&rq->lock); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); /* * resched_task - mark a task 'to be rescheduled now'. @@ -651,6 +666,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) #define WMULT_SHIFT 32 +/* + * Shift right and round: + */ +#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) + static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) @@ -658,18 +678,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, u64 tmp; if (unlikely(!lw->inv_weight)) - lw->inv_weight = WMULT_CONST / lw->weight; + lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1; tmp = (u64)delta_exec * weight; /* * Check whether we'd overflow the 64-bit multiplication: */ - if (unlikely(tmp > WMULT_CONST)) { - tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) - >> (WMULT_SHIFT/2); - } else { - tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; - } + if (unlikely(tmp > WMULT_CONST)) + tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, + WMULT_SHIFT/2); + else + tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } @@ -717,11 +736,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec) * the relative distance between them is ~25%.) */ static const int prio_to_weight[40] = { -/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, -/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, -/* 0 */ NICE_0_LOAD /* 1024 */, -/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, -/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, + /* -20 */ 88761, 71755, 56483, 46273, 36291, + /* -15 */ 29154, 23254, 18705, 14949, 11916, + /* -10 */ 9548, 7620, 6100, 4904, 3906, + /* -5 */ 3121, 2501, 1991, 1586, 1277, + /* 0 */ 1024, 820, 655, 526, 423, + /* 5 */ 335, 272, 215, 172, 137, + /* 10 */ 110, 87, 70, 56, 45, + /* 15 */ 36, 29, 23, 18, 15, }; /* @@ -732,14 +754,14 @@ static const int prio_to_weight[40] = { * into multiplications: */ static const u32 prio_to_wmult[40] = { -/* -20 */ 48356, 60446, 75558, 94446, 118058, -/* -15 */ 147573, 184467, 230589, 288233, 360285, -/* -10 */ 450347, 562979, 703746, 879575, 1099582, -/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, -/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, -/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, -/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, -/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, + /* -20 */ 48388, 59856, 76040, 92818, 118348, + /* -15 */ 147320, 184698, 229616, 287308, 360437, + /* -10 */ 449829, 563644, 704093, 875809, 1099582, + /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, + /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, + /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, + /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, + /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); @@ -795,14 +817,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls) * This function is called /before/ updating rq->ls.load * and when switching tasks. */ -static void update_curr_load(struct rq *rq, u64 now) +static void update_curr_load(struct rq *rq) { struct load_stat *ls = &rq->ls; u64 start; start = ls->load_update_start; - ls->load_update_start = now; - ls->delta_stat += now - start; + ls->load_update_start = rq->clock; + ls->delta_stat += rq->clock - start; /* * Stagger updates to ls->delta_fair. Very frequent updates * can be expensive. @@ -811,35 +833,32 @@ static void update_curr_load(struct rq *rq, u64 now) __update_curr_load(rq, ls); } -static inline void -inc_load(struct rq *rq, const struct task_struct *p, u64 now) +static inline void inc_load(struct rq *rq, const struct task_struct *p) { - update_curr_load(rq, now); + update_curr_load(rq); update_load_add(&rq->ls.load, p->se.load.weight); } -static inline void -dec_load(struct rq *rq, const struct task_struct *p, u64 now) +static inline void dec_load(struct rq *rq, const struct task_struct *p) { - update_curr_load(rq, now); + update_curr_load(rq); update_load_sub(&rq->ls.load, p->se.load.weight); } -static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void inc_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running++; - inc_load(rq, p, now); + inc_load(rq, p); } -static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void dec_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running--; - dec_load(rq, p, now); + dec_load(rq, p); } static void set_load_weight(struct task_struct *p) { - task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; p->se.wait_runtime = 0; if (task_has_rt_policy(p)) { @@ -861,18 +880,16 @@ static void set_load_weight(struct task_struct *p) p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; } -static void -enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) { sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup, now); + p->sched_class->enqueue_task(rq, p, wakeup); p->se.on_rq = 1; } -static void -dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) { - p->sched_class->dequeue_task(rq, p, sleep, now); + p->sched_class->dequeue_task(rq, p, sleep); p->se.on_rq = 0; } @@ -927,13 +944,11 @@ static int effective_prio(struct task_struct *p) */ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) { - u64 now = rq_clock(rq); - if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, wakeup, now); - inc_nr_running(p, rq, now); + enqueue_task(rq, p, wakeup); + inc_nr_running(p, rq); } /* @@ -941,26 +956,25 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) */ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) { - u64 now = rq_clock(rq); + update_rq_clock(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, 0, now); - inc_nr_running(p, rq, now); + enqueue_task(rq, p, 0); + inc_nr_running(p, rq); } /* * deactivate_task - remove a task from the runqueue. */ -static void -deactivate_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) { if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; - dequeue_task(rq, p, sleep, now); - dec_nr_running(p, rq, now); + dequeue_task(rq, p, sleep); + dec_nr_running(p, rq); } /** @@ -1528,6 +1542,7 @@ out_set_cpu: out_activate: #endif /* CONFIG_SMP */ + update_rq_clock(rq); activate_task(rq, p, 1); /* * Sync wakeups (i.e. those types of wakeups where the waker @@ -1572,6 +1587,7 @@ static void __sched_fork(struct task_struct *p) p->se.wait_start_fair = 0; p->se.exec_start = 0; p->se.sum_exec_runtime = 0; + p->se.prev_sum_exec_runtime = 0; p->se.delta_exec = 0; p->se.delta_fair_run = 0; p->se.delta_fair_sleep = 0; @@ -1659,15 +1675,19 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) unsigned long flags; struct rq *rq; int this_cpu; - u64 now; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ - now = rq_clock(rq); + update_rq_clock(rq); p->prio = effective_prio(p); + if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || !current->se.on_rq) { @@ -1678,8 +1698,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p, now); - inc_nr_running(p, rq, now); + p->sched_class->task_new(rq, p); + inc_nr_running(p, rq); } check_preempt_curr(rq, p); task_rq_unlock(rq, &flags); @@ -1966,7 +1986,6 @@ static void update_cpu_load(struct rq *this_rq) unsigned long total_load = this_rq->ls.load.weight; unsigned long this_load = total_load; struct load_stat *ls = &this_rq->ls; - u64 now = __rq_clock(this_rq); int i, scale; this_rq->nr_load_updates++; @@ -1974,7 +1993,7 @@ static void update_cpu_load(struct rq *this_rq) goto do_avg; /* Update delta_fair/delta_exec fields first */ - update_curr_load(this_rq, now); + update_curr_load(this_rq); fair_delta64 = ls->delta_fair + 1; ls->delta_fair = 0; @@ -1982,8 +2001,8 @@ static void update_cpu_load(struct rq *this_rq) exec_delta64 = ls->delta_exec + 1; ls->delta_exec = 0; - sample_interval64 = now - ls->load_update_last; - ls->load_update_last = now; + sample_interval64 = this_rq->clock - ls->load_update_last; + ls->load_update_last = this_rq->clock; if ((s64)sample_interval64 < (s64)TICK_NSEC) sample_interval64 = TICK_NSEC; @@ -2038,6 +2057,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) spin_lock(&rq1->lock); } } + update_rq_clock(rq1); + update_rq_clock(rq2); } /* @@ -2134,7 +2155,7 @@ void sched_exec(void) static void pull_task(struct rq *src_rq, struct task_struct *p, struct rq *this_rq, int this_cpu) { - deactivate_task(src_rq, p, 0, rq_clock(src_rq)); + deactivate_task(src_rq, p, 0); set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); /* @@ -2165,12 +2186,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, if (task_running(rq, p)) return 0; - /* - * Aggressive migration if too many balance attempts have failed: - */ - if (sd->nr_balance_failed > sd->cache_nice_tries) - return 1; - return 1; } @@ -2502,7 +2517,7 @@ group_next: * a think about bumping its value to force at least one task to be * moved */ - if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { + if (*imbalance < busiest_load_per_task) { unsigned long tmp, pwr_now, pwr_move; unsigned int imbn; @@ -2554,10 +2569,8 @@ small_imbalance: pwr_move /= SCHED_LOAD_SCALE; /* Move if we gain throughput */ - if (pwr_move <= pwr_now) - goto out_balanced; - - *imbalance = busiest_load_per_task; + if (pwr_move > pwr_now) + *imbalance = busiest_load_per_task; } return busiest; @@ -2817,6 +2830,8 @@ redo: if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); + /* this_rq->clock is already updated */ + update_rq_clock(busiest); ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); @@ -2914,6 +2929,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* move a task from busiest_rq to target_rq */ double_lock_balance(busiest_rq, target_rq); + update_rq_clock(busiest_rq); + update_rq_clock(target_rq); /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { @@ -3024,6 +3041,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) struct sched_domain *sd; /* Earliest time when we have to do rebalance again */ unsigned long next_balance = jiffies + 60*HZ; + int update_next_balance = 0; for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -3060,8 +3078,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) if (sd->flags & SD_SERIALIZE) spin_unlock(&balancing); out: - if (time_after(next_balance, sd->last_balance + interval)) + if (time_after(next_balance, sd->last_balance + interval)) { next_balance = sd->last_balance + interval; + update_next_balance = 1; + } /* * Stop the load balance at this level. There is another @@ -3071,7 +3091,14 @@ out: if (!balance) break; } - rq->next_balance = next_balance; + + /* + * next_balance will be updated only when there is a need. + * When the cpu is attached to null domain for ex, it will not be + * updated. + */ + if (likely(update_next_balance)) + rq->next_balance = next_balance; } /* @@ -3110,7 +3137,7 @@ static void run_rebalance_domains(struct softirq_action *h) if (need_resched()) break; - rebalance_domains(balance_cpu, SCHED_IDLE); + rebalance_domains(balance_cpu, CPU_IDLE); rq = cpu_rq(balance_cpu); if (time_after(this_rq->next_balance, rq->next_balance)) @@ -3221,7 +3248,8 @@ unsigned long long task_sched_runtime(struct task_struct *p) rq = task_rq_lock(p, &flags); ns = p->se.sum_exec_runtime; if (rq->curr == p) { - delta_exec = rq_clock(rq) - p->se.exec_start; + update_rq_clock(rq); + delta_exec = rq->clock - p->se.exec_start; if ((s64)delta_exec > 0) ns += delta_exec; } @@ -3315,8 +3343,16 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; + u64 next_tick = rq->tick_timestamp + TICK_NSEC; spin_lock(&rq->lock); + __update_rq_clock(rq); + /* + * Let rq->clock advance by at least TICK_NSEC: + */ + if (unlikely(rq->clock < next_tick)) + rq->clock = next_tick; + rq->tick_timestamp = rq->clock; update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ curr->sched_class->task_tick(rq, curr); @@ -3401,7 +3437,7 @@ static inline void schedule_debug(struct task_struct *prev) * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) +pick_next_task(struct rq *rq, struct task_struct *prev) { struct sched_class *class; struct task_struct *p; @@ -3411,14 +3447,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.nr_running)) { - p = fair_sched_class.pick_next_task(rq, now); + p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; } class = sched_class_highest; for ( ; ; ) { - p = class->pick_next_task(rq, now); + p = class->pick_next_task(rq); if (p) return p; /* @@ -3437,7 +3473,6 @@ asmlinkage void __sched schedule(void) struct task_struct *prev, *next; long *switch_count; struct rq *rq; - u64 now; int cpu; need_resched: @@ -3455,14 +3490,14 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); clear_tsk_need_resched(prev); - now = __rq_clock(rq); + __update_rq_clock(rq); if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely((prev->state & TASK_INTERRUPTIBLE) && unlikely(signal_pending(prev)))) { prev->state = TASK_RUNNING; } else { - deactivate_task(rq, prev, 1, now); + deactivate_task(rq, prev, 1); } switch_count = &prev->nvcsw; } @@ -3470,8 +3505,8 @@ need_resched_nonpreemptible: if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - prev->sched_class->put_prev_task(rq, prev, now); - next = pick_next_task(rq, prev, now); + prev->sched_class->put_prev_task(rq, prev); + next = pick_next_task(rq, prev); sched_info_switch(prev, next); @@ -3914,17 +3949,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio) unsigned long flags; int oldprio, on_rq; struct rq *rq; - u64 now; BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); - now = rq_clock(rq); + update_rq_clock(rq); oldprio = p->prio; on_rq = p->se.on_rq; if (on_rq) - dequeue_task(rq, p, 0, now); + dequeue_task(rq, p, 0); if (rt_prio(prio)) p->sched_class = &rt_sched_class; @@ -3934,7 +3968,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) p->prio = prio; if (on_rq) { - enqueue_task(rq, p, 0, now); + enqueue_task(rq, p, 0); /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on @@ -3957,7 +3991,6 @@ void set_user_nice(struct task_struct *p, long nice) int old_prio, delta, on_rq; unsigned long flags; struct rq *rq; - u64 now; if (TASK_NICE(p) == nice || nice < -20 || nice > 19) return; @@ -3966,7 +3999,7 @@ void set_user_nice(struct task_struct *p, long nice) * the task might be in the middle of scheduling on another CPU. */ rq = task_rq_lock(p, &flags); - now = rq_clock(rq); + update_rq_clock(rq); /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -3979,8 +4012,8 @@ void set_user_nice(struct task_struct *p, long nice) } on_rq = p->se.on_rq; if (on_rq) { - dequeue_task(rq, p, 0, now); - dec_load(rq, p, now); + dequeue_task(rq, p, 0); + dec_load(rq, p); } p->static_prio = NICE_TO_PRIO(nice); @@ -3990,8 +4023,8 @@ void set_user_nice(struct task_struct *p, long nice) delta = p->prio - old_prio; if (on_rq) { - enqueue_task(rq, p, 0, now); - inc_load(rq, p, now); + enqueue_task(rq, p, 0); + inc_load(rq, p); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -4227,9 +4260,10 @@ recheck: spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) - deactivate_task(rq, p, 0, rq_clock(rq)); + deactivate_task(rq, p, 0); oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); if (on_rq) { @@ -4522,10 +4556,7 @@ asmlinkage long sys_sched_yield(void) struct rq *rq = this_rq_lock(); schedstat_inc(rq, yld_cnt); - if (unlikely(rq->nr_running == 1)) - schedstat_inc(rq, yld_act_empty); - else - current->sched_class->yield_task(rq, current); + current->sched_class->yield_task(rq, current); /* * Since we are going to call schedule() anyway, there's @@ -4881,14 +4912,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; static inline void sched_init_granularity(void) { unsigned int factor = 1 + ilog2(num_online_cpus()); - const unsigned long gran_limit = 100000000; + const unsigned long limit = 100000000; + + sysctl_sched_min_granularity *= factor; + if (sysctl_sched_min_granularity > limit) + sysctl_sched_min_granularity = limit; - sysctl_sched_granularity *= factor; - if (sysctl_sched_granularity > gran_limit) - sysctl_sched_granularity = gran_limit; + sysctl_sched_latency *= factor; + if (sysctl_sched_latency > limit) + sysctl_sched_latency = limit; - sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; - sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; + sysctl_sched_runtime_limit = sysctl_sched_latency; + sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2; } #ifdef CONFIG_SMP @@ -4982,7 +5017,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) on_rq = p->se.on_rq; if (on_rq) - deactivate_task(rq_src, p, 0, rq_clock(rq_src)); + deactivate_task(rq_src, p, 0); + set_task_cpu(p, dest_cpu); if (on_rq) { activate_task(rq_dest, p, 0); @@ -5215,7 +5251,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu) for ( ; ; ) { if (!rq->nr_running) break; - next = pick_next_task(rq, rq->curr, rq_clock(rq)); + update_rq_clock(rq); + next = pick_next_task(rq, rq->curr); if (!next) break; migrate_dead(dead_cpu, next); @@ -5229,15 +5266,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu) static struct ctl_table sd_ctl_dir[] = { { .procname = "sched_domain", - .mode = 0755, + .mode = 0555, }, {0,}, }; static struct ctl_table sd_ctl_root[] = { { + .ctl_name = CTL_KERN, .procname = "kernel", - .mode = 0755, + .mode = 0555, .child = sd_ctl_dir, }, {0,}, @@ -5313,7 +5351,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) for_each_domain(cpu, sd) { snprintf(buf, 32, "domain%d", i); entry->procname = kstrdup(buf, GFP_KERNEL); - entry->mode = 0755; + entry->mode = 0555; entry->child = sd_alloc_ctl_domain_table(sd); entry++; i++; @@ -5333,7 +5371,7 @@ static void init_sched_domain_sysctl(void) for (i = 0; i < cpu_num; i++, entry++) { snprintf(buf, 32, "cpu%d", i); entry->procname = kstrdup(buf, GFP_KERNEL); - entry->mode = 0755; + entry->mode = 0555; entry->child = sd_alloc_ctl_cpu_table(i); } sd_sysctl_header = register_sysctl_table(sd_ctl_root); @@ -5400,7 +5438,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq->migration_thread = NULL; /* Idle task back to normal (off runqueue, low prio) */ rq = task_rq_lock(rq->idle, &flags); - deactivate_task(rq, rq->idle, 0, rq_clock(rq)); + update_rq_clock(rq); + deactivate_task(rq, rq->idle, 0); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); rq->idle->sched_class = &idle_sched_class; @@ -6322,7 +6361,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static int arch_reinit_sched_domains(void) { int err; @@ -6351,24 +6390,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) return ret ? ret : count; } -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) -{ - int err = 0; - -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); -#endif - return err; -} -#endif - #ifdef CONFIG_SCHED_MC static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) { @@ -6379,8 +6400,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 0); } -SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT @@ -6393,8 +6414,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 1); } -SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, - sched_smt_power_savings_store); +static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, + sched_smt_power_savings_store); +#endif + +int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +{ + int err = 0; + +#ifdef CONFIG_SCHED_SMT + if (smt_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_smt_power_savings.attr); +#endif +#ifdef CONFIG_SCHED_MC + if (!err && mc_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_mc_power_savings.attr); +#endif + return err; +} #endif /* @@ -6637,12 +6676,13 @@ void normalize_rt_tasks(void) goto out_unlock; #endif + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) - deactivate_task(task_rq(p), p, 0, rq_clock(task_rq(p))); + deactivate_task(rq, p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { - activate_task(task_rq(p), p, 0); + activate_task(rq, p, 0); resched_task(rq->curr); } #ifdef CONFIG_SMP