xfs: remove nr_to_write writeback windup.

[safe/jmp/linux-2.6] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index fbaf312..f8b8996 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -323,6 +323,15 @@ static inline struct task_group *task_group(struct task_struct *p)
  /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
  static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
  {
+       /*
+        * Strictly speaking this rcu_read_lock() is not needed since the
+        * task_group is tied to the cgroup, which in turn can never go away
+        * as long as there are tasks attached to it.
+        *
+        * However since task_group() uses task_subsys_state() which is an
+        * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
+        */
+       rcu_read_lock();
  #ifdef CONFIG_FAIR_GROUP_SCHED
         p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
         p->se.parent = task_group(p)->se[cpu];
@@ -332,6 +341,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
         p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
         p->rt.parent = task_group(p)->rt_se[cpu];
  #endif
+       rcu_read_unlock();
  }
  
  #else
@@ -534,6 +544,8 @@ struct rq {
         struct root_domain *rd;
         struct sched_domain *sd;
  
+       unsigned long cpu_power;
+
         unsigned char idle_at_tick;
         /* For active balancing */
         int post_schedule;
@@ -959,14 +971,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
         }
  }
  
-void task_rq_unlock_wait(struct task_struct *p)
-{
-       struct rq *rq = task_rq(p);
-
-       smp_mb(); /* spin-unlock-wait is not a full memory barrier */
-       raw_spin_unlock_wait(&rq->lock);
-}
-
  static void __task_rq_unlock(struct rq *rq)
         __releases(rq->lock)
  {
@@ -1497,24 +1501,9 @@ static unsigned long target_load(int cpu, int type)
         return max(rq->cpu_load[type-1], total);
  }
  
-static struct sched_group *group_of(int cpu)
-{
-       struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
-
-       if (!sd)
-               return NULL;
-
-       return sd->groups;
-}
-
  static unsigned long power_of(int cpu)
  {
-       struct sched_group *group = group_of(cpu);
-
-       if (!group)
-               return SCHED_LOAD_SCALE;
-
-       return group->cpu_power;
+       return cpu_rq(cpu)->cpu_power;
  }
  
  static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1852,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
  static void set_load_weight(struct task_struct *p)
  {
         if (task_has_rt_policy(p)) {
-               p->se.load.weight = prio_to_weight[0] * 2;
-               p->se.load.inv_weight = prio_to_wmult[0] >> 1;
+               p->se.load.weight = 0;
+               p->se.load.inv_weight = WMULT_CONST;
                 return;
         }
  
@@ -2058,49 +2047,6 @@ static bool migrate_task(struct task_struct *p, int dest_cpu)
  }
  
  /*
- * wait_task_context_switch -  wait for a thread to complete at least one
- *                             context switch.
- *
- * @p must not be current.
- */
-void wait_task_context_switch(struct task_struct *p)
-{
-       unsigned long nvcsw, nivcsw, flags;
-       int running;
-       struct rq *rq;
-
-       nvcsw   = p->nvcsw;
-       nivcsw  = p->nivcsw;
-       for (;;) {
-               /*
-                * The runqueue is assigned before the actual context
-                * switch. We need to take the runqueue lock.
-                *
-                * We could check initially without the lock but it is
-                * very likely that we need to take the lock in every
-                * iteration.
-                */
-               rq = task_rq_lock(p, &flags);
-               running = task_running(rq, p);
-               task_rq_unlock(rq, &flags);
-
-               if (likely(!running))
-                       break;
-               /*
-                * The switch count is incremented before the actual
-                * context switch. We thus wait for two switches to be
-                * sure at least one completed.
-                */
-               if ((p->nvcsw - nvcsw) > 1)
-                       break;
-               if ((p->nivcsw - nivcsw) > 1)
-                       break;
-
-               cpu_relax();
-       }
-}
-
-/*
   * wait_task_inactive - wait for a thread to unschedule.
   *
   * If @match_state is nonzero, it's the @p->state value just checked and
@@ -2155,7 +2101,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                  * just go back and repeat.
                  */
                 rq = task_rq_lock(p, &flags);
-               trace_sched_wait_task(rq, p);
+               trace_sched_wait_task(p);
                 running = task_running(rq, p);
                 on_rq = p->se.on_rq;
                 ncsw = 0;
@@ -2426,7 +2372,7 @@ out_activate:
         success = 1;
  
  out_running:
-       trace_sched_wakeup(rq, p, success);
+       trace_sched_wakeup(p, success);
         check_preempt_curr(rq, p, wake_flags);
  
         p->state = TASK_RUNNING;
@@ -2600,7 +2546,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
  
         rq = task_rq_lock(p, &flags);
         activate_task(rq, p, 0);
-       trace_sched_wakeup_new(rq, p, 1);
+       trace_sched_wakeup_new(p, 1);
         check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
         if (p->sched_class->task_woken)
@@ -2820,7 +2766,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
         struct mm_struct *mm, *oldmm;
  
         prepare_task_switch(rq, prev, next);
-       trace_sched_switch(rq, prev, next);
+       trace_sched_switch(prev, next);
         mm = next->mm;
         oldmm = prev->active_mm;
         /*
@@ -3641,7 +3587,7 @@ need_resched:
         preempt_disable();
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
-       rcu_sched_qs(cpu);
+       rcu_note_context_switch(cpu);
         prev = rq->curr;
         switch_count = &prev->nivcsw;
  
@@ -3724,7 +3670,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
          * the mutex owner just released it and exited.
          */
         if (probe_kernel_address(&owner->cpu, cpu))
-               goto out;
+               return 0;
  #else
         cpu = owner->cpu;
  #endif
@@ -3734,14 +3680,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
          * the cpu field may no longer be valid.
          */
         if (cpu >= nr_cpumask_bits)
-               goto out;
+               return 0;
  
         /*
          * We need to validate that we can do a
          * get_cpu() and that we have the percpu area.
          */
         if (!cpu_online(cpu))
-               goto out;
+               return 0;
  
         rq = cpu_rq(cpu);
  
@@ -3760,7 +3706,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
  
                 cpu_relax();
         }
-out:
+
         return 1;
  }
  #endif
@@ -3884,6 +3830,7 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
  {
         __wake_up_common(q, mode, 1, 0, NULL);
  }
+EXPORT_SYMBOL_GPL(__wake_up_locked);
  
  void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
  {
@@ -3983,8 +3930,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
         if (!x->done) {
                 DECLARE_WAITQUEUE(wait, current);
  
-               wait.flags |= WQ_FLAG_EXCLUSIVE;
-               __add_wait_queue_tail(&x->wait, &wait);
+               __add_wait_queue_tail_exclusive(&x->wait, &wait);
                 do {
                         if (signal_pending_state(state, current)) {
                                 timeout = -ERESTARTSYS;
@@ -4095,6 +4041,23 @@ int __sched wait_for_completion_killable(struct completion *x)
  EXPORT_SYMBOL(wait_for_completion_killable);
  
  /**
+ * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be
+ * signaled or for a specified timeout to expire. It can be
+ * interrupted by a kill signal. The timeout is in jiffies.
+ */
+unsigned long __sched
+wait_for_completion_killable_timeout(struct completion *x,
+                                    unsigned long timeout)
+{
+       return wait_for_common(x, timeout, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(wait_for_completion_killable_timeout);
+
+/**
   *     try_wait_for_completion - try to decrement a completion without blocking
   *     @x:     completion structure
   *
@@ -7629,6 +7592,7 @@ void __init sched_init(void)
  #ifdef CONFIG_SMP
                 rq->sd = NULL;
                 rq->rd = NULL;
+               rq->cpu_power = SCHED_LOAD_SCALE;
                 rq->post_schedule = 0;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
@@ -7792,9 +7756,9 @@ void normalize_rt_tasks(void)
  
  #endif /* CONFIG_MAGIC_SYSRQ */
  
-#ifdef CONFIG_IA64
+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
  /*
- * These functions are only useful for the IA64 MCA handling.
+ * These functions are only useful for the IA64 MCA handling, or kdb.
   *
   * They can only be called when the whole system has been
   * stopped - every CPU needs to be quiescent, and no scheduling
@@ -7814,6 +7778,9 @@ struct task_struct *curr_task(int cpu)
         return cpu_curr(cpu);
  }
  
+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
+
+#ifdef CONFIG_IA64
  /**
   * set_curr_task - set the current task for a given cpu.
   * @cpu: the processor in question.