Merge commit 'v2.6.28-rc1' into sched/urgent

author Ingo Molnar <mingo@elte.hu>

Fri, 24 Oct 2008 10:48:46 +0000 (12:48 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 24 Oct 2008 10:48:46 +0000 (12:48 +0200)
author Ingo Molnar <mingo@elte.hu>
Fri, 24 Oct 2008 10:48:46 +0000 (12:48 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 24 Oct 2008 10:48:46 +0000 (12:48 +0200)
diff --combined include/linux/sched.h

index c05b45f,8478f33..b483f39
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -402,12 -402,21 +402,21 @@@ extern int get_dumpable(struct mm_struc
   #define MMF_DUMP_MAPPED_PRIVATE       4
   #define MMF_DUMP_MAPPED_SHARED        5
   #define MMF_DUMP_ELF_HEADERS  6
+ #define MMF_DUMP_HUGETLB_PRIVATE 7
+ #define MMF_DUMP_HUGETLB_SHARED  8
   #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
- #define MMF_DUMP_FILTER_BITS  5
+ #define MMF_DUMP_FILTER_BITS  7
   #define MMF_DUMP_FILTER_MASK \
         (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
   #define MMF_DUMP_FILTER_DEFAULT \
-       ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
+       ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+        (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+ 
+ #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+ # define MMF_DUMP_MASK_DEFAULT_ELF    (1 << MMF_DUMP_ELF_HEADERS)
+ #else
+ # define MMF_DUMP_MASK_DEFAULT_ELF    0
+ #endif
   
   struct sighand_struct {
         atomic_t                count;
@@@ -424,6 -433,39 +433,39 @@@ struct pacct_struct 
         unsigned long           ac_minflt, ac_majflt;
   };
   
+ /**
+  * struct task_cputime - collected CPU time counts
+  * @utime:            time spent in user mode, in &cputime_t units
+  * @stime:            time spent in kernel mode, in &cputime_t units
+  * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
+  *
+  * This structure groups together three kinds of CPU time that are
+  * tracked for threads and thread groups.  Most things considering
+  * CPU time want to group these counts together and treat all three
+  * of them in parallel.
+  */
+ struct task_cputime {
+       cputime_t utime;
+       cputime_t stime;
+       unsigned long long sum_exec_runtime;
+ };
+ /* Alternate field names when used to cache expirations. */
+ #define prof_exp      stime
+ #define virt_exp      utime
+ #define sched_exp     sum_exec_runtime
+ 
+ /**
+  * struct thread_group_cputime - thread group interval timer counts
+  * @totals:           thread group interval timers; substructure for
+  *                    uniprocessor kernel, per-cpu for SMP kernel.
+  *
+  * This structure contains the version of task_cputime, above, that is
+  * used for thread group CPU clock calculations.
+  */
+ struct thread_group_cputime {
+       struct task_cputime *totals;
+ };
+ 
   /*
    * NOTE! "signal_struct" does not have it's own
    * locking, because a shared signal_struct always
@@@ -469,6 -511,17 +511,17 @@@ struct signal_struct 
         cputime_t it_prof_expires, it_virt_expires;
         cputime_t it_prof_incr, it_virt_incr;
   
+       /*
+        * Thread group totals for process CPU clocks.
+        * See thread_group_cputime(), et al, for details.
+        */
+       struct thread_group_cputime cputime;
+ 
+       /* Earliest-expiration cache. */
+       struct task_cputime cputime_expires;
+ 
+       struct list_head cpu_timers[3];
+ 
         /* job control IDs */
   
         /*
@@@ -499,7 -552,7 +552,7 @@@
          * Live threads maintain their own counters and add to these
          * in __exit_signal, except for the group leader.
          */
-       cputime_t utime, stime, cutime, cstime;
+       cputime_t cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@@ -508,14 -561,6 +561,6 @@@
         struct task_io_accounting ioac;
   
         /*
-        * Cumulative ns of scheduled CPU time for dead threads in the
-        * group, not including a zombie group leader.  (This only differs
-        * from jiffies_to_ns(utime + stime) if sched_clock uses something
-        * other than jiffies.)
-        */
-       unsigned long long sum_sched_runtime;
- 
-       /*
          * We don't bother to synchronize most readers of this at all,
          * because there is no reader checking a limit that actually needs
          * to get both rlim_cur and rlim_max atomically, and either one
@@@ -526,8 -571,6 +571,6 @@@
          */
         struct rlimit rlim[RLIM_NLIMITS];
   
-       struct list_head cpu_timers[3];
- 
         /* keep the process-shared keyrings here so that they do the right
          * thing in threads created with CLONE_THREAD */
   #ifdef CONFIG_KEYS
@@@ -637,10 -680,6 +680,6 @@@ struct sched_info 
   };
   #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
   
- #ifdef CONFIG_SCHEDSTATS
- extern const struct file_operations proc_schedstat_operations;
- #endif /* CONFIG_SCHEDSTATS */
- 
   #ifdef CONFIG_TASK_DELAY_ACCT
   struct task_delay_info {
         spinlock_t      lock;
@@@ -897,6 -936,7 +936,6 @@@ struct sched_class 
         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
         void (*yield_task) (struct rq *rq);
- -      int  (*select_task_rq)(struct task_struct *p, int sync);
   
         void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
   
@@@ -904,8 -944,6 +943,8 @@@
         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
   
   #ifdef CONFIG_SMP
+ +      int  (*select_task_rq)(struct task_struct *p, int sync);
+ +
         unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
                         struct rq *busiest, unsigned long max_load_move,
                         struct sched_domain *sd, enum cpu_idle_type idle,
@@@ -917,17 -955,16 +956,17 @@@
         void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
         void (*post_schedule) (struct rq *this_rq);
         void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
- -#endif
   
- -      void (*set_curr_task) (struct rq *rq);
- -      void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
- -      void (*task_new) (struct rq *rq, struct task_struct *p);
         void (*set_cpus_allowed)(struct task_struct *p,
                                  const cpumask_t *newmask);
   
         void (*rq_online)(struct rq *rq);
         void (*rq_offline)(struct rq *rq);
+ +#endif
+ +
+ +      void (*set_curr_task) (struct rq *rq);
+ +      void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+ +      void (*task_new) (struct rq *rq, struct task_struct *p);
   
         void (*switched_from) (struct rq *this_rq, struct task_struct *task,
                                int running);
@@@ -1138,8 -1175,7 +1177,7 @@@ struct task_struct 
   /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
         unsigned long min_flt, maj_flt;
   
-       cputime_t it_prof_expires, it_virt_expires;
-       unsigned long long it_sched_expires;
+       struct task_cputime cputime_expires;
         struct list_head cpu_timers[3];
   
   /* process credentials */
@@@ -1305,6 -1341,12 +1343,12 @@@
         int latency_record_count;
         struct latency_record latency_record[LT_SAVECOUNT];
   #endif
+       /*
+        * time slack values; these are used to round up poll() and
+        * select() etc timeout values. These are in nanoseconds.
+        */
+       unsigned long timer_slack_ns;
+       unsigned long default_timer_slack_ns;
   };
   
   /*
@@@ -1589,6 -1631,7 +1633,7 @@@ extern unsigned long long cpu_clock(in
   
   extern unsigned long long
   task_sched_runtime(struct task_struct *task);
+ extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
   
   /* sched_exec is called by processes performing an exec */
   #ifdef CONFIG_SMP
@@@ -2087,6 -2130,30 +2132,30 @@@ static inline int spin_needbreak(spinlo
   }
   
   /*
+  * Thread group CPU time accounting.
+  */
+ 
+ extern int thread_group_cputime_alloc(struct task_struct *);
+ extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+ 
+ static inline void thread_group_cputime_init(struct signal_struct *sig)
+ {
+       sig->cputime.totals = NULL;
+ }
+ 
+ static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+ {
+       if (curr->signal->cputime.totals)
+               return 0;
+       return thread_group_cputime_alloc(curr);
+ }
+ 
+ static inline void thread_group_cputime_free(struct signal_struct *sig)
+ {
+       free_percpu(sig->cputime.totals);
+ }
+ 
+ /*
    * Reevaluate whether the task has signals pending delivery.
    * Wake the task if so.
    * This is required every time the blocked sigset_t changes.
diff --combined kernel/sched_fair.c

index 8de48a5,9573c33..42d211f
--- 1/kernel/sched_fair.c
--- 2/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@@ -449,6 -449,7 +449,7 @@@ static void update_curr(struct cfs_rq *
                 struct task_struct *curtask = task_of(curr);
   
                 cpuacct_charge(curtask, delta_exec);
+               account_group_exec_runtime(curtask, delta_exec);
         }
   }
   
@@@ -1593,6 -1594,9 +1594,6 @@@ static const struct sched_class fair_sc
         .enqueue_task           = enqueue_task_fair,
         .dequeue_task           = dequeue_task_fair,
         .yield_task             = yield_task_fair,
- -#ifdef CONFIG_SMP
- -      .select_task_rq         = select_task_rq_fair,
- -#endif /* CONFIG_SMP */
   
         .check_preempt_curr     = check_preempt_wakeup,
   
@@@ -1600,8 -1604,6 +1601,8 @@@
         .put_prev_task          = put_prev_task_fair,
   
   #ifdef CONFIG_SMP
+ +      .select_task_rq         = select_task_rq_fair,
+ +
         .load_balance           = load_balance_fair,
         .move_one_task          = move_one_task_fair,
   #endif
diff --combined kernel/sched_rt.c

index c9aa5be,b446dc8..d9ba9d5
--- 1/kernel/sched_rt.c
--- 2/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@@ -526,6 -526,8 +526,8 @@@ static void update_curr_rt(struct rq *r
         schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
   
         curr->se.sum_exec_runtime += delta_exec;
+       account_group_exec_runtime(curr, delta_exec);
+ 
         curr->se.exec_start = rq->clock;
         cpuacct_charge(curr, delta_exec);
   
@@@ -1458,7 -1460,7 +1460,7 @@@ static void watchdog(struct rq *rq, str
                 p->rt.timeout++;
                 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                 if (p->rt.timeout > next)
-                       p->it_sched_expires = p->se.sum_exec_runtime;
+                       p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
         }
   }
   
@@@ -1502,6 -1504,9 +1504,6 @@@ static const struct sched_class rt_sche
         .enqueue_task           = enqueue_task_rt,
         .dequeue_task           = dequeue_task_rt,
         .yield_task             = yield_task_rt,
- -#ifdef CONFIG_SMP
- -      .select_task_rq         = select_task_rq_rt,
- -#endif /* CONFIG_SMP */
   
         .check_preempt_curr     = check_preempt_curr_rt,
   
@@@ -1509,8 -1514,6 +1511,8 @@@
         .put_prev_task          = put_prev_task_rt,
   
   #ifdef CONFIG_SMP
+ +      .select_task_rq         = select_task_rq_rt,
+ +
         .load_balance           = load_balance_rt,
         .move_one_task          = move_one_task_rt,
         .set_cpus_allowed       = set_cpus_allowed_rt,
author	Ingo Molnar <mingo@elte.hu>
	Fri, 24 Oct 2008 10:48:46 +0000 (12:48 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 24 Oct 2008 10:48:46 +0000 (12:48 +0200)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_fair.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_rt.c	patch \|	diff1 \|	diff2 \|	blob \| history