Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

[safe/jmp/linux-2.6] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index d613723..6c10fa7 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -61,6 +61,7 @@
  #include <linux/delayacct.h>
  #include <linux/reciprocal_div.h>
  #include <linux/unistd.h>
+#include <linux/pagemap.h>
  
  #include <asm/tlb.h>
  
@@ -262,7 +263,9 @@ struct rq {
         s64 clock_max_delta;
  
         unsigned int clock_warps, clock_overflows;
-       unsigned int clock_unstable_events;
+       u64 idle_clock;
+       unsigned int clock_deep_idle_events;
+       u64 tick_timestamp;
  
         atomic_t nr_iowait;
  
@@ -341,8 +344,11 @@ static void __update_rq_clock(struct rq *rq)
                 /*
                  * Catch too large forward jumps too:
                  */
-               if (unlikely(delta > 2*TICK_NSEC)) {
-                       clock++;
+               if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) {
+                       if (clock < rq->tick_timestamp + TICK_NSEC)
+                               clock = rq->tick_timestamp + TICK_NSEC;
+                       else
+                               clock++;
                         rq->clock_overflows++;
                 } else {
                         if (unlikely(delta > rq->clock_max_delta))
@@ -361,19 +367,6 @@ static void update_rq_clock(struct rq *rq)
                 __update_rq_clock(rq);
  }
  
-static u64 __rq_clock(struct rq *rq)
-{
-       __update_rq_clock(rq);
-
-       return rq->clock;
-}
-
-static u64 rq_clock(struct rq *rq)
-{
-       update_rq_clock(rq);
-       return rq->clock;
-}
-
  /*
   * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
   * See detach_destroy_domains: synchronize_sched for details.
@@ -565,18 +558,40 @@ static inline struct rq *this_rq_lock(void)
  }
  
  /*
- * CPU frequency is/was unstable - start new by setting prev_clock_raw:
+ * We are going deep-idle (irqs are disabled):
   */
-void sched_clock_unstable_event(void)
+void sched_clock_idle_sleep_event(void)
  {
-       unsigned long flags;
-       struct rq *rq;
+       struct rq *rq = cpu_rq(smp_processor_id());
  
-       rq = task_rq_lock(current, &flags);
-       rq->prev_clock_raw = sched_clock();
-       rq->clock_unstable_events++;
-       task_rq_unlock(rq, &flags);
+       spin_lock(&rq->lock);
+       __update_rq_clock(rq);
+       spin_unlock(&rq->lock);
+       rq->clock_deep_idle_events++;
  }
+EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+
+/*
+ * We just idled delta nanoseconds (called with irqs disabled):
+ */
+void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+       struct rq *rq = cpu_rq(smp_processor_id());
+       u64 now = sched_clock();
+
+       rq->idle_clock += delta_ns;
+       /*
+        * Override the previous timestamp and ignore all
+        * sched_clock() deltas that occured while we idled,
+        * and use the PM-provided delta_ns to advance the
+        * rq clock:
+        */
+       spin_lock(&rq->lock);
+       rq->prev_clock_raw = now;
+       rq->clock += delta_ns;
+       spin_unlock(&rq->lock);
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
  
  /*
   * resched_task - mark a task 'to be rescheduled now'.
@@ -651,6 +666,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
  
  #define WMULT_SHIFT    32
  
+/*
+ * Shift right and round:
+ */
+#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+
  static unsigned long
  calc_delta_mine(unsigned long delta_exec, unsigned long weight,
                 struct load_weight *lw)
@@ -658,18 +678,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
         u64 tmp;
  
         if (unlikely(!lw->inv_weight))
-               lw->inv_weight = WMULT_CONST / lw->weight;
+               lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
  
         tmp = (u64)delta_exec * weight;
         /*
          * Check whether we'd overflow the 64-bit multiplication:
          */
-       if (unlikely(tmp > WMULT_CONST)) {
-               tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight)
-                               >> (WMULT_SHIFT/2);
-       } else {
-               tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
-       }
+       if (unlikely(tmp > WMULT_CONST))
+               tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+                       WMULT_SHIFT/2);
+       else
+               tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
  
         return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
  }
@@ -717,11 +736,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
   * the relative distance between them is ~25%.)
   */
  static const int prio_to_weight[40] = {
-/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
-/* -10 */  9537,  7629,  6103,  4883,  3906,  3125,  2500,  2000,  1600,  1280,
-/*   0 */  NICE_0_LOAD /* 1024 */,
-/*   1 */          819,   655,   524,   419,   336,   268,   215,   172,   137,
-/*  10 */   110,    87,    70,    56,    45,    36,    29,    23,    18,    15,
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
  };
  
  /*
@@ -732,14 +754,14 @@ static const int prio_to_weight[40] = {
   * into multiplications:
   */
  static const u32 prio_to_wmult[40] = {
-/* -20 */     48356,     60446,     75558,     94446,    118058,
-/* -15 */    147573,    184467,    230589,    288233,    360285,
-/* -10 */    450347,    562979,    703746,    879575,   1099582,
-/*  -5 */   1374389,   1717986,   2147483,   2684354,   3355443,
-/*   0 */   4194304,   5244160,   6557201,   8196502,  10250518,
-/*   5 */  12782640,  16025997,  19976592,  24970740,  31350126,
-/*  10 */  39045157,  49367440,  61356675,  76695844,  95443717,
-/*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+ /* -20 */     48388,     59856,     76040,     92818,    118348,
+ /* -15 */    147320,    184698,    229616,    287308,    360437,
+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
  };
  
  static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
@@ -795,14 +817,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
   * This function is called /before/ updating rq->ls.load
   * and when switching tasks.
   */
-static void update_curr_load(struct rq *rq, u64 now)
+static void update_curr_load(struct rq *rq)
  {
         struct load_stat *ls = &rq->ls;
         u64 start;
  
         start = ls->load_update_start;
-       ls->load_update_start = now;
-       ls->delta_stat += now - start;
+       ls->load_update_start = rq->clock;
+       ls->delta_stat += rq->clock - start;
         /*
          * Stagger updates to ls->delta_fair. Very frequent updates
          * can be expensive.
@@ -811,35 +833,32 @@ static void update_curr_load(struct rq *rq, u64 now)
                 __update_curr_load(rq, ls);
  }
  
-static inline void
-inc_load(struct rq *rq, const struct task_struct *p, u64 now)
+static inline void inc_load(struct rq *rq, const struct task_struct *p)
  {
-       update_curr_load(rq, now);
+       update_curr_load(rq);
         update_load_add(&rq->ls.load, p->se.load.weight);
  }
  
-static inline void
-dec_load(struct rq *rq, const struct task_struct *p, u64 now)
+static inline void dec_load(struct rq *rq, const struct task_struct *p)
  {
-       update_curr_load(rq, now);
+       update_curr_load(rq);
         update_load_sub(&rq->ls.load, p->se.load.weight);
  }
  
-static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+static void inc_nr_running(struct task_struct *p, struct rq *rq)
  {
         rq->nr_running++;
-       inc_load(rq, p, now);
+       inc_load(rq, p);
  }
  
-static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+static void dec_nr_running(struct task_struct *p, struct rq *rq)
  {
         rq->nr_running--;
-       dec_load(rq, p, now);
+       dec_load(rq, p);
  }
  
  static void set_load_weight(struct task_struct *p)
  {
-       task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
         p->se.wait_runtime = 0;
  
         if (task_has_rt_policy(p)) {
@@ -861,18 +880,16 @@ static void set_load_weight(struct task_struct *p)
         p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
  }
  
-static void
-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
  {
         sched_info_queued(p);
-       p->sched_class->enqueue_task(rq, p, wakeup, now);
+       p->sched_class->enqueue_task(rq, p, wakeup);
         p->se.on_rq = 1;
  }
  
-static void
-dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
  {
-       p->sched_class->dequeue_task(rq, p, sleep, now);
+       p->sched_class->dequeue_task(rq, p, sleep);
         p->se.on_rq = 0;
  }
  
@@ -927,13 +944,11 @@ static int effective_prio(struct task_struct *p)
   */
  static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
  {
-       u64 now = rq_clock(rq);
-
         if (p->state == TASK_UNINTERRUPTIBLE)
                 rq->nr_uninterruptible--;
  
-       enqueue_task(rq, p, wakeup, now);
-       inc_nr_running(p, rq, now);
+       enqueue_task(rq, p, wakeup);
+       inc_nr_running(p, rq);
  }
  
  /*
@@ -941,26 +956,25 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
   */
  static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
  {
-       u64 now = rq_clock(rq);
+       update_rq_clock(rq);
  
         if (p->state == TASK_UNINTERRUPTIBLE)
                 rq->nr_uninterruptible--;
  
-       enqueue_task(rq, p, 0, now);
-       inc_nr_running(p, rq, now);
+       enqueue_task(rq, p, 0);
+       inc_nr_running(p, rq);
  }
  
  /*
   * deactivate_task - remove a task from the runqueue.
   */
-static void
-deactivate_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
  {
         if (p->state == TASK_UNINTERRUPTIBLE)
                 rq->nr_uninterruptible++;
  
-       dequeue_task(rq, p, sleep, now);
-       dec_nr_running(p, rq, now);
+       dequeue_task(rq, p, sleep);
+       dec_nr_running(p, rq);
  }
  
  /**
@@ -1528,6 +1542,7 @@ out_set_cpu:
  
  out_activate:
  #endif /* CONFIG_SMP */
+       update_rq_clock(rq);
         activate_task(rq, p, 1);
         /*
          * Sync wakeups (i.e. those types of wakeups where the waker
@@ -1572,6 +1587,7 @@ static void __sched_fork(struct task_struct *p)
         p->se.wait_start_fair           = 0;
         p->se.exec_start                = 0;
         p->se.sum_exec_runtime          = 0;
+       p->se.prev_sum_exec_runtime     = 0;
         p->se.delta_exec                = 0;
         p->se.delta_fair_run            = 0;
         p->se.delta_fair_sleep          = 0;
@@ -1659,15 +1675,19 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
         unsigned long flags;
         struct rq *rq;
         int this_cpu;
-       u64 now;
  
         rq = task_rq_lock(p, &flags);
         BUG_ON(p->state != TASK_RUNNING);
         this_cpu = smp_processor_id(); /* parent's CPU */
-       now = rq_clock(rq);
+       update_rq_clock(rq);
  
         p->prio = effective_prio(p);
  
+       if (rt_prio(p->prio))
+               p->sched_class = &rt_sched_class;
+       else
+               p->sched_class = &fair_sched_class;
+
         if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
                         (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
                         !current->se.on_rq) {
@@ -1678,8 +1698,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                  * Let the scheduling class do new task startup
                  * management (if any):
                  */
-               p->sched_class->task_new(rq, p, now);
-               inc_nr_running(p, rq, now);
+               p->sched_class->task_new(rq, p);
+               inc_nr_running(p, rq);
         }
         check_preempt_curr(rq, p);
         task_rq_unlock(rq, &flags);
@@ -1966,7 +1986,6 @@ static void update_cpu_load(struct rq *this_rq)
         unsigned long total_load = this_rq->ls.load.weight;
         unsigned long this_load =  total_load;
         struct load_stat *ls = &this_rq->ls;
-       u64 now = __rq_clock(this_rq);
         int i, scale;
  
         this_rq->nr_load_updates++;
@@ -1974,7 +1993,7 @@ static void update_cpu_load(struct rq *this_rq)
                 goto do_avg;
  
         /* Update delta_fair/delta_exec fields first */
-       update_curr_load(this_rq, now);
+       update_curr_load(this_rq);
  
         fair_delta64 = ls->delta_fair + 1;
         ls->delta_fair = 0;
@@ -1982,8 +2001,8 @@ static void update_cpu_load(struct rq *this_rq)
         exec_delta64 = ls->delta_exec + 1;
         ls->delta_exec = 0;
  
-       sample_interval64 = now - ls->load_update_last;
-       ls->load_update_last = now;
+       sample_interval64 = this_rq->clock - ls->load_update_last;
+       ls->load_update_last = this_rq->clock;
  
         if ((s64)sample_interval64 < (s64)TICK_NSEC)
                 sample_interval64 = TICK_NSEC;
@@ -2038,6 +2057,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
                         spin_lock(&rq1->lock);
                 }
         }
+       update_rq_clock(rq1);
+       update_rq_clock(rq2);
  }
  
  /*
@@ -2134,7 +2155,7 @@ void sched_exec(void)
  static void pull_task(struct rq *src_rq, struct task_struct *p,
                       struct rq *this_rq, int this_cpu)
  {
-       deactivate_task(src_rq, p, 0, rq_clock(src_rq));
+       deactivate_task(src_rq, p, 0);
         set_task_cpu(p, this_cpu);
         activate_task(this_rq, p, 0);
         /*
@@ -2165,12 +2186,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
         if (task_running(rq, p))
                 return 0;
  
-       /*
-        * Aggressive migration if too many balance attempts have failed:
-        */
-       if (sd->nr_balance_failed > sd->cache_nice_tries)
-               return 1;
-
         return 1;
  }
  
@@ -2502,7 +2517,7 @@ group_next:
          * a think about bumping its value to force at least one task to be
          * moved
          */
-       if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) {
+       if (*imbalance < busiest_load_per_task) {
                 unsigned long tmp, pwr_now, pwr_move;
                 unsigned int imbn;
  
@@ -2554,10 +2569,8 @@ small_imbalance:
                 pwr_move /= SCHED_LOAD_SCALE;
  
                 /* Move if we gain throughput */
-               if (pwr_move <= pwr_now)
-                       goto out_balanced;
-
-               *imbalance = busiest_load_per_task;
+               if (pwr_move > pwr_now)
+                       *imbalance = busiest_load_per_task;
         }
  
         return busiest;
@@ -2817,6 +2830,8 @@ redo:
         if (busiest->nr_running > 1) {
                 /* Attempt to move tasks */
                 double_lock_balance(this_rq, busiest);
+               /* this_rq->clock is already updated */
+               update_rq_clock(busiest);
                 ld_moved = move_tasks(this_rq, this_cpu, busiest,
                                         imbalance, sd, CPU_NEWLY_IDLE,
                                         &all_pinned);
@@ -2914,6 +2929,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
  
         /* move a task from busiest_rq to target_rq */
         double_lock_balance(busiest_rq, target_rq);
+       update_rq_clock(busiest_rq);
+       update_rq_clock(target_rq);
  
         /* Search for an sd spanning us and the target CPU. */
         for_each_domain(target_cpu, sd) {
@@ -3024,6 +3041,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
         struct sched_domain *sd;
         /* Earliest time when we have to do rebalance again */
         unsigned long next_balance = jiffies + 60*HZ;
+       int update_next_balance = 0;
  
         for_each_domain(cpu, sd) {
                 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3060,8 +3078,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
                 if (sd->flags & SD_SERIALIZE)
                         spin_unlock(&balancing);
  out:
-               if (time_after(next_balance, sd->last_balance + interval))
+               if (time_after(next_balance, sd->last_balance + interval)) {
                         next_balance = sd->last_balance + interval;
+                       update_next_balance = 1;
+               }
  
                 /*
                  * Stop the load balance at this level. There is another
@@ -3071,7 +3091,14 @@ out:
                 if (!balance)
                         break;
         }
-       rq->next_balance = next_balance;
+
+       /*
+        * next_balance will be updated only when there is a need.
+        * When the cpu is attached to null domain for ex, it will not be
+        * updated.
+        */
+       if (likely(update_next_balance))
+               rq->next_balance = next_balance;
  }
  
  /*
@@ -3110,7 +3137,7 @@ static void run_rebalance_domains(struct softirq_action *h)
                         if (need_resched())
                                 break;
  
-                       rebalance_domains(balance_cpu, SCHED_IDLE);
+                       rebalance_domains(balance_cpu, CPU_IDLE);
  
                         rq = cpu_rq(balance_cpu);
                         if (time_after(this_rq->next_balance, rq->next_balance))
@@ -3221,7 +3248,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
         rq = task_rq_lock(p, &flags);
         ns = p->se.sum_exec_runtime;
         if (rq->curr == p) {
-               delta_exec = rq_clock(rq) - p->se.exec_start;
+               update_rq_clock(rq);
+               delta_exec = rq->clock - p->se.exec_start;
                 if ((s64)delta_exec > 0)
                         ns += delta_exec;
         }
@@ -3315,8 +3343,16 @@ void scheduler_tick(void)
         int cpu = smp_processor_id();
         struct rq *rq = cpu_rq(cpu);
         struct task_struct *curr = rq->curr;
+       u64 next_tick = rq->tick_timestamp + TICK_NSEC;
  
         spin_lock(&rq->lock);
+       __update_rq_clock(rq);
+       /*
+        * Let rq->clock advance by at least TICK_NSEC:
+        */
+       if (unlikely(rq->clock < next_tick))
+               rq->clock = next_tick;
+       rq->tick_timestamp = rq->clock;
         update_cpu_load(rq);
         if (curr != rq->idle) /* FIXME: needed? */
                 curr->sched_class->task_tick(rq, curr);
@@ -3401,7 +3437,7 @@ static inline void schedule_debug(struct task_struct *prev)
   * Pick up the highest-prio task:
   */
  static inline struct task_struct *
-pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
+pick_next_task(struct rq *rq, struct task_struct *prev)
  {
         struct sched_class *class;
         struct task_struct *p;
@@ -3411,14 +3447,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
          * the fair class we can call that function directly:
          */
         if (likely(rq->nr_running == rq->cfs.nr_running)) {
-               p = fair_sched_class.pick_next_task(rq, now);
+               p = fair_sched_class.pick_next_task(rq);
                 if (likely(p))
                         return p;
         }
  
         class = sched_class_highest;
         for ( ; ; ) {
-               p = class->pick_next_task(rq, now);
+               p = class->pick_next_task(rq);
                 if (p)
                         return p;
                 /*
@@ -3437,7 +3473,6 @@ asmlinkage void __sched schedule(void)
         struct task_struct *prev, *next;
         long *switch_count;
         struct rq *rq;
-       u64 now;
         int cpu;
  
  need_resched:
@@ -3455,14 +3490,14 @@ need_resched_nonpreemptible:
  
         spin_lock_irq(&rq->lock);
         clear_tsk_need_resched(prev);
-       now = __rq_clock(rq);
+       __update_rq_clock(rq);
  
         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
                 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
                                 unlikely(signal_pending(prev)))) {
                         prev->state = TASK_RUNNING;
                 } else {
-                       deactivate_task(rq, prev, 1, now);
+                       deactivate_task(rq, prev, 1);
                 }
                 switch_count = &prev->nvcsw;
         }
@@ -3470,8 +3505,8 @@ need_resched_nonpreemptible:
         if (unlikely(!rq->nr_running))
                 idle_balance(cpu, rq);
  
-       prev->sched_class->put_prev_task(rq, prev, now);
-       next = pick_next_task(rq, prev, now);
+       prev->sched_class->put_prev_task(rq, prev);
+       next = pick_next_task(rq, prev);
  
         sched_info_switch(prev, next);
  
@@ -3914,17 +3949,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         unsigned long flags;
         int oldprio, on_rq;
         struct rq *rq;
-       u64 now;
  
         BUG_ON(prio < 0 || prio > MAX_PRIO);
  
         rq = task_rq_lock(p, &flags);
-       now = rq_clock(rq);
+       update_rq_clock(rq);
  
         oldprio = p->prio;
         on_rq = p->se.on_rq;
         if (on_rq)
-               dequeue_task(rq, p, 0, now);
+               dequeue_task(rq, p, 0);
  
         if (rt_prio(prio))
                 p->sched_class = &rt_sched_class;
@@ -3934,7 +3968,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         p->prio = prio;
  
         if (on_rq) {
-               enqueue_task(rq, p, 0, now);
+               enqueue_task(rq, p, 0);
                 /*
                  * Reschedule if we are currently running on this runqueue and
                  * our priority decreased, or if we are not currently running on
@@ -3957,7 +3991,6 @@ void set_user_nice(struct task_struct *p, long nice)
         int old_prio, delta, on_rq;
         unsigned long flags;
         struct rq *rq;
-       u64 now;
  
         if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
                 return;
@@ -3966,7 +3999,7 @@ void set_user_nice(struct task_struct *p, long nice)
          * the task might be in the middle of scheduling on another CPU.
          */
         rq = task_rq_lock(p, &flags);
-       now = rq_clock(rq);
+       update_rq_clock(rq);
         /*
          * The RT priorities are set via sched_setscheduler(), but we still
          * allow the 'normal' nice value to be set - but as expected
@@ -3979,8 +4012,8 @@ void set_user_nice(struct task_struct *p, long nice)
         }
         on_rq = p->se.on_rq;
         if (on_rq) {
-               dequeue_task(rq, p, 0, now);
-               dec_load(rq, p, now);
+               dequeue_task(rq, p, 0);
+               dec_load(rq, p);
         }
  
         p->static_prio = NICE_TO_PRIO(nice);
@@ -3990,8 +4023,8 @@ void set_user_nice(struct task_struct *p, long nice)
         delta = p->prio - old_prio;
  
         if (on_rq) {
-               enqueue_task(rq, p, 0, now);
-               inc_load(rq, p, now);
+               enqueue_task(rq, p, 0);
+               inc_load(rq, p);
                 /*
                  * If the task increased its priority or is running and
                  * lowered its priority, then reschedule its CPU:
@@ -4227,9 +4260,10 @@ recheck:
                 spin_unlock_irqrestore(&p->pi_lock, flags);
                 goto recheck;
         }
+       update_rq_clock(rq);
         on_rq = p->se.on_rq;
         if (on_rq)
-               deactivate_task(rq, p, 0, rq_clock(rq));
+               deactivate_task(rq, p, 0);
         oldprio = p->prio;
         __setscheduler(rq, p, policy, param->sched_priority);
         if (on_rq) {
@@ -4522,10 +4556,7 @@ asmlinkage long sys_sched_yield(void)
         struct rq *rq = this_rq_lock();
  
         schedstat_inc(rq, yld_cnt);
-       if (unlikely(rq->nr_running == 1))
-               schedstat_inc(rq, yld_act_empty);
-       else
-               current->sched_class->yield_task(rq, current);
+       current->sched_class->yield_task(rq, current);
  
         /*
          * Since we are going to call schedule() anyway, there's
@@ -4881,14 +4912,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  static inline void sched_init_granularity(void)
  {
         unsigned int factor = 1 + ilog2(num_online_cpus());
-       const unsigned long gran_limit = 100000000;
+       const unsigned long limit = 100000000;
+
+       sysctl_sched_min_granularity *= factor;
+       if (sysctl_sched_min_granularity > limit)
+               sysctl_sched_min_granularity = limit;
  
-       sysctl_sched_granularity *= factor;
-       if (sysctl_sched_granularity > gran_limit)
-               sysctl_sched_granularity = gran_limit;
+       sysctl_sched_latency *= factor;
+       if (sysctl_sched_latency > limit)
+               sysctl_sched_latency = limit;
  
-       sysctl_sched_runtime_limit = sysctl_sched_granularity * 4;
-       sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2;
+       sysctl_sched_runtime_limit = sysctl_sched_latency;
+       sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
  }
  
  #ifdef CONFIG_SMP
@@ -4982,7 +5017,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
  
         on_rq = p->se.on_rq;
         if (on_rq)
-               deactivate_task(rq_src, p, 0, rq_clock(rq_src));
+               deactivate_task(rq_src, p, 0);
+
         set_task_cpu(p, dest_cpu);
         if (on_rq) {
                 activate_task(rq_dest, p, 0);
@@ -5215,7 +5251,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
         for ( ; ; ) {
                 if (!rq->nr_running)
                         break;
-               next = pick_next_task(rq, rq->curr, rq_clock(rq));
+               update_rq_clock(rq);
+               next = pick_next_task(rq, rq->curr);
                 if (!next)
                         break;
                 migrate_dead(dead_cpu, next);
@@ -5229,15 +5266,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
  static struct ctl_table sd_ctl_dir[] = {
         {
                 .procname       = "sched_domain",
-               .mode           = 0755,
+               .mode           = 0555,
         },
         {0,},
  };
  
  static struct ctl_table sd_ctl_root[] = {
         {
+               .ctl_name       = CTL_KERN,
                 .procname       = "kernel",
-               .mode           = 0755,
+               .mode           = 0555,
                 .child          = sd_ctl_dir,
         },
         {0,},
@@ -5313,7 +5351,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
         for_each_domain(cpu, sd) {
                 snprintf(buf, 32, "domain%d", i);
                 entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0755;
+               entry->mode = 0555;
                 entry->child = sd_alloc_ctl_domain_table(sd);
                 entry++;
                 i++;
@@ -5333,7 +5371,7 @@ static void init_sched_domain_sysctl(void)
         for (i = 0; i < cpu_num; i++, entry++) {
                 snprintf(buf, 32, "cpu%d", i);
                 entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0755;
+               entry->mode = 0555;
                 entry->child = sd_alloc_ctl_cpu_table(i);
         }
         sd_sysctl_header = register_sysctl_table(sd_ctl_root);
@@ -5400,7 +5438,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                 rq->migration_thread = NULL;
                 /* Idle task back to normal (off runqueue, low prio) */
                 rq = task_rq_lock(rq->idle, &flags);
-               deactivate_task(rq, rq->idle, 0, rq_clock(rq));
+               update_rq_clock(rq);
+               deactivate_task(rq, rq->idle, 0);
                 rq->idle->static_prio = MAX_PRIO;
                 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                 rq->idle->sched_class = &idle_sched_class;
@@ -6322,7 +6361,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
  }
  
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
+static int arch_reinit_sched_domains(void)
  {
         int err;
  
@@ -6351,24 +6390,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
         return ret ? ret : count;
  }
  
-int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
-{
-       int err = 0;
-
-#ifdef CONFIG_SCHED_SMT
-       if (smt_capable())
-               err = sysfs_create_file(&cls->kset.kobj,
-                                       &attr_sched_smt_power_savings.attr);
-#endif
-#ifdef CONFIG_SCHED_MC
-       if (!err && mc_capable())
-               err = sysfs_create_file(&cls->kset.kobj,
-                                       &attr_sched_mc_power_savings.attr);
-#endif
-       return err;
-}
-#endif
-
  #ifdef CONFIG_SCHED_MC
  static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
  {
@@ -6379,8 +6400,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
  {
         return sched_power_savings_store(buf, count, 0);
  }
-SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
-           sched_mc_power_savings_store);
+static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
+                  sched_mc_power_savings_store);
  #endif
  
  #ifdef CONFIG_SCHED_SMT
@@ -6393,8 +6414,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
  {
         return sched_power_savings_store(buf, count, 1);
  }
-SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
-           sched_smt_power_savings_store);
+static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
+                  sched_smt_power_savings_store);
+#endif
+
+int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+{
+       int err = 0;
+
+#ifdef CONFIG_SCHED_SMT
+       if (smt_capable())
+               err = sysfs_create_file(&cls->kset.kobj,
+                                       &attr_sched_smt_power_savings.attr);
+#endif
+#ifdef CONFIG_SCHED_MC
+       if (!err && mc_capable())
+               err = sysfs_create_file(&cls->kset.kobj,
+                                       &attr_sched_mc_power_savings.attr);
+#endif
+       return err;
+}
  #endif
  
  /*
@@ -6637,12 +6676,13 @@ void normalize_rt_tasks(void)
                         goto out_unlock;
  #endif
  
+               update_rq_clock(rq);
                 on_rq = p->se.on_rq;
                 if (on_rq)
-                       deactivate_task(task_rq(p), p, 0, rq_clock(task_rq(p)));
+                       deactivate_task(rq, p, 0);
                 __setscheduler(rq, p, SCHED_NORMAL, 0);
                 if (on_rq) {
-                       activate_task(task_rq(p), p, 0);
+                       activate_task(rq, p, 0);
                         resched_task(rq->curr);
                 }
  #ifdef CONFIG_SMP