xfs: remove nr_to_write writeback windup.
[safe/jmp/linux-2.6] / kernel / sched_stats.h
index d6903bd..32d2bd4 100644 (file)
@@ -4,12 +4,12 @@
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
-#define SCHEDSTAT_VERSION 14
+#define SCHEDSTAT_VERSION 15
 
 static int show_schedstat(struct seq_file *seq, void *v)
 {
        int cpu;
-       int mask_len = NR_CPUS/32 * 9;
+       int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
        char *mask_str = kmalloc(mask_len, GFP_KERNEL);
 
        if (mask_str == NULL)
@@ -26,12 +26,11 @@ static int show_schedstat(struct seq_file *seq, void *v)
 
                /* runqueue-specific stats */
                seq_printf(seq,
-                   "cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
-                   cpu, rq->yld_both_empty,
-                   rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
+                   "cpu%d %u %u %u %u %u %u %llu %llu %lu",
+                   cpu, rq->yld_count,
                    rq->sched_switch, rq->sched_count, rq->sched_goidle,
                    rq->ttwu_count, rq->ttwu_local,
-                   rq->rq_sched_info.cpu_time,
+                   rq->rq_cpu_time,
                    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
 
                seq_printf(seq, "\n");
@@ -42,7 +41,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
                for_each_domain(cpu, sd) {
                        enum cpu_idle_type itype;
 
-                       cpumask_scnprintf(mask_str, mask_len, sd->span);
+                       cpumask_scnprintf(mask_str, mask_len,
+                                         sched_domain_span(sd));
                        seq_printf(seq, "domain%d %s", dcount++, mask_str);
                        for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
                                        itype++) {
@@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file)
        return res;
 }
 
-const struct file_operations proc_schedstat_operations = {
+static const struct file_operations proc_schedstat_operations = {
        .open    = schedstat_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = single_release,
 };
 
+static int __init proc_schedstat_init(void)
+{
+       proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
+       return 0;
+}
+module_init(proc_schedstat_init);
+
 /*
  * Expects runqueue lock to be held for atomicity of update
  */
@@ -116,7 +123,7 @@ static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {
        if (rq)
-               rq->rq_sched_info.cpu_time += delta;
+               rq->rq_cpu_time += delta;
 }
 
 static inline void
@@ -229,7 +236,6 @@ static inline void sched_info_depart(struct task_struct *t)
        unsigned long long delta = task_rq(t)->clock -
                                        t->sched_info.last_arrival;
 
-       t->sched_info.cpu_time += delta;
        rq_sched_info_depart(task_rq(t), delta);
 
        if (t->state == TASK_RUNNING)
@@ -276,133 +282,94 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
  * on CONFIG_SCHEDSTATS.
  */
 
-#ifdef CONFIG_SMP
-
 /**
- * thread_group_cputime_account_user - Maintain utime for a thread group.
+ * account_group_user_time - Maintain utime for a thread group.
  *
- * @tgtimes:   Pointer to thread_group_cputime structure.
- * @cputime:   Time value by which to increment the utime field of that
- *             structure.
+ * @tsk:       Pointer to task structure.
+ * @cputime:   Time value by which to increment the utime field of the
+ *             thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the utime field there.
  */
-static inline void thread_group_cputime_account_user(
-       struct thread_group_cputime *tgtimes,
-       cputime_t cputime)
+static inline void account_group_user_time(struct task_struct *tsk,
+                                          cputime_t cputime)
 {
-       if (tgtimes->totals) {
-               struct task_cputime *times;
+       struct thread_group_cputimer *cputimer;
 
-               times = per_cpu_ptr(tgtimes->totals, get_cpu());
-               times->utime = cputime_add(times->utime, cputime);
-               put_cpu_no_resched();
-       }
+       /* tsk == current, ensure it is safe to use ->signal */
+       if (unlikely(tsk->exit_state))
+               return;
+
+       cputimer = &tsk->signal->cputimer;
+
+       if (!cputimer->running)
+               return;
+
+       spin_lock(&cputimer->lock);
+       cputimer->cputime.utime =
+               cputime_add(cputimer->cputime.utime, cputime);
+       spin_unlock(&cputimer->lock);
 }
 
 /**
- * thread_group_cputime_account_system - Maintain stime for a thread group.
+ * account_group_system_time - Maintain stime for a thread group.
  *
- * @tgtimes:   Pointer to thread_group_cputime structure.
- * @cputime:   Time value by which to increment the stime field of that
- *             structure.
+ * @tsk:       Pointer to task structure.
+ * @cputime:   Time value by which to increment the stime field of the
+ *             thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the stime field there.
  */
-static inline void thread_group_cputime_account_system(
-       struct thread_group_cputime *tgtimes,
-       cputime_t cputime)
+static inline void account_group_system_time(struct task_struct *tsk,
+                                            cputime_t cputime)
 {
-       if (tgtimes->totals) {
-               struct task_cputime *times;
+       struct thread_group_cputimer *cputimer;
 
-               times = per_cpu_ptr(tgtimes->totals, get_cpu());
-               times->stime = cputime_add(times->stime, cputime);
-               put_cpu_no_resched();
-       }
+       /* tsk == current, ensure it is safe to use ->signal */
+       if (unlikely(tsk->exit_state))
+               return;
+
+       cputimer = &tsk->signal->cputimer;
+
+       if (!cputimer->running)
+               return;
+
+       spin_lock(&cputimer->lock);
+       cputimer->cputime.stime =
+               cputime_add(cputimer->cputime.stime, cputime);
+       spin_unlock(&cputimer->lock);
 }
 
 /**
- * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
- *                                             thread group.
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
  *
- * @tgtimes:   Pointer to thread_group_cputime structure.
+ * @tsk:       Pointer to task structure.
  * @ns:                Time value by which to increment the sum_exec_runtime field
- *             of that structure.
+ *             of the thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the sum_exec_runtime field there.
  */
-static inline void thread_group_cputime_account_exec_runtime(
-       struct thread_group_cputime *tgtimes,
-       unsigned long long ns)
-{
-       if (tgtimes->totals) {
-               struct task_cputime *times;
-
-               times = per_cpu_ptr(tgtimes->totals, get_cpu());
-               times->sum_exec_runtime += ns;
-               put_cpu_no_resched();
-       }
-}
-
-#else /* CONFIG_SMP */
-
-static inline void thread_group_cputime_account_user(
-       struct thread_group_cputime *tgtimes,
-       cputime_t cputime)
-{
-       tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
-}
-
-static inline void thread_group_cputime_account_system(
-       struct thread_group_cputime *tgtimes,
-       cputime_t cputime)
-{
-       tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
-}
-
-static inline void thread_group_cputime_account_exec_runtime(
-       struct thread_group_cputime *tgtimes,
-       unsigned long long ns)
-{
-       tgtimes->totals->sum_exec_runtime += ns;
-}
-
-#endif /* CONFIG_SMP */
-
-/*
- * These are the generic time-accounting routines that use the above
- * functions.  They are the functions actually called by the scheduler.
- */
-static inline void account_group_user_time(struct task_struct *tsk,
-                                           cputime_t cputime)
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+                                             unsigned long long ns)
 {
+       struct thread_group_cputimer *cputimer;
        struct signal_struct *sig;
 
        sig = tsk->signal;
-       if (likely(sig))
-               thread_group_cputime_account_user(&sig->cputime, cputime);
-}
+       /* see __exit_signal()->task_rq_unlock_wait() */
+       barrier();
+       if (unlikely(!sig))
+               return;
 
-static inline void account_group_system_time(struct task_struct *tsk,
-                                             cputime_t cputime)
-{
-       struct signal_struct *sig;
+       cputimer = &sig->cputimer;
 
-       sig = tsk->signal;
-       if (likely(sig))
-               thread_group_cputime_account_system(&sig->cputime, cputime);
-}
+       if (!cputimer->running)
+               return;
 
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-                                              unsigned long long ns)
-{
-       struct signal_struct *sig;
-
-       sig = tsk->signal;
-       if (likely(sig))
-               thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+       spin_lock(&cputimer->lock);
+       cputimer->cputime.sum_exec_runtime += ns;
+       spin_unlock(&cputimer->lock);
 }