X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Flinux%2Fsched.h;h=0c35b0343a76bf20327af6a84b1be976493735e3;hb=33dcdac2df54e66c447ae03f58c95c7251aa5649;hp=1845b2e99a871eca1c2bf439468102d4f5f2fecc;hpb=2aa44d0567ed21b47b87d68819415d48194cb923;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/linux/sched.h b/include/linux/sched.h index 1845b2e..0c35b03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H -#include /* For AT_VECTOR_SIZE */ - /* * cloning flags: */ @@ -27,6 +25,9 @@ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ +#define CLONE_NEWPID 0x20000000 /* New pid namespace */ +#define CLONE_NEWNET 0x40000000 /* New network namespace */ +#define CLONE_IO 0x80000000 /* Clone io context */ /* * Scheduling policies @@ -57,27 +58,25 @@ struct sched_param { #include #include #include +#include #include -#include #include #include -#include #include #include #include #include -#include #include #include #include #include #include #include +#include #include #include -#include #include #include @@ -86,11 +85,15 @@ struct sched_param { #include #include #include +#include +#include #include +struct mem_cgroup; struct exec_domain; struct futex_pi_state; +struct robust_list_head; struct bio; /* @@ -113,7 +116,7 @@ extern unsigned long avenrun[]; /* Load averages */ #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<exit_state */ #define EXIT_ZOMBIE 16 #define EXIT_DEAD 32 /* in tsk->state again */ -#define TASK_NONINTERACTIVE 64 -#define TASK_DEAD 128 +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 + +/* Convenience macros for the sake of set_task_state */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) + +/* Convenience macros for the sake of wake_up */ +#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) + +/* get_task_state() */ +#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ + TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ + __TASK_TRACED) + +#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) +#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) +#define task_is_stopped_or_traced(task) \ + ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) +#define task_contributes_to_load(task) \ + ((task->state & TASK_UNINTERRUPTIBLE) != 0) #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -215,6 +242,7 @@ struct task_struct; extern void sched_init(void); extern void sched_init_smp(void); +extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); @@ -228,6 +256,8 @@ static inline int select_nohz_load_balancer(int cpu) } #endif +extern unsigned long rt_needs_cpu(int cpu); + /* * Only dump TASK_* tasks. (0 for all tasks) */ @@ -252,14 +282,22 @@ long io_schedule_timeout(long timeout); extern void cpu_init (void); extern void trap_init(void); +extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); +extern void hrtick_resched(void); + +extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); +extern unsigned long softlockup_thresh; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; #else static inline void softlockup_tick(void) { @@ -278,12 +316,17 @@ static inline void touch_all_softlockup_watchdogs(void) /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) + +/* Linker adds these: start and end of __sched functions */ +extern char __sched_text_start[], __sched_text_end[]; + /* Is this address in the __sched functions? */ extern int in_sched_functions(unsigned long addr); #define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long FASTCALL(schedule_timeout(signed long timeout)); +extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); +extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); @@ -317,7 +360,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member) #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) -typedef atomic_long_t mm_counter_t; #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ /* @@ -329,7 +371,6 @@ typedef atomic_long_t mm_counter_t; #define add_mm_counter(mm, member, value) (mm)->_##member += (value) #define inc_mm_counter(mm, member) (mm)->_##member++ #define dec_mm_counter(mm, member) (mm)->_##member-- -typedef unsigned long mm_counter_t; #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ @@ -359,86 +400,19 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_DUMP_ANON_SHARED 3 #define MMF_DUMP_MAPPED_PRIVATE 4 #define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 4 +#define MMF_DUMP_FILTER_BITS 5 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) -struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ - struct rb_root mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ - unsigned long (*get_unmapped_area) (struct file *filp, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags); - void (*unmap_area) (struct mm_struct *mm, unsigned long addr); - unsigned long mmap_base; /* base of mmap area */ - unsigned long task_size; /* size of task vm space */ - unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ - unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ - pgd_t * pgd; - atomic_t mm_users; /* How many users with user space? */ - atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ - int map_count; /* number of VMAs */ - struct rw_semaphore mmap_sem; - spinlock_t page_table_lock; /* Protects page tables and some counters */ - - struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung - * together off init_mm.mmlist, and are protected - * by mmlist_lock - */ - - /* Special counters, in some configurations protected by the - * page_table_lock, in other configurations by being atomic. - */ - mm_counter_t _file_rss; - mm_counter_t _anon_rss; - - unsigned long hiwater_rss; /* High-watermark of RSS usage */ - unsigned long hiwater_vm; /* High-water virtual memory usage */ - - unsigned long total_vm, locked_vm, shared_vm, exec_vm; - unsigned long stack_vm, reserved_vm, def_flags, nr_ptes; - unsigned long start_code, end_code, start_data, end_data; - unsigned long start_brk, brk, start_stack; - unsigned long arg_start, arg_end, env_start, env_end; - - unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - - cpumask_t cpu_vm_mask; - - /* Architecture-specific MM context */ - mm_context_t context; - - /* Swap token stuff */ - /* - * Last value of global fault stamp as seen by this process. - * In other words, this value gives an indication of how long - * it has been since this task got the token. - * Look at mm/thrash.c - */ - unsigned int faultstamp; - unsigned int token_priority; - unsigned int last_interval; - - unsigned long flags; /* Must use atomic bitops to access the bits */ - - /* coredumping support */ - int core_waiters; - struct completion *core_startup_done, core_done; - - /* aio bits */ - rwlock_t ioctx_list_lock; - struct kioctx *ioctx_list; -}; - struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; - struct list_head signalfd_list; + wait_queue_head_t signalfd_wqh; }; struct pacct_struct { @@ -487,7 +461,7 @@ struct signal_struct { /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; - struct task_struct *tsk; + struct pid *leader_pid; ktime_t it_real_incr; /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ @@ -495,7 +469,17 @@ struct signal_struct { cputime_t it_prof_incr, it_virt_incr; /* job control IDs */ - pid_t pgrp; + + /* + * pgrp and session fields are deprecated. + * use the task_session_Xnr and task_pgrp_Xnr routines below + */ + + union { + pid_t pgrp __deprecated; + pid_t __pgrp; + }; + struct pid *tty_old_pgrp; union { @@ -515,6 +499,8 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ cputime_t utime, stime, cutime, cstime; + cputime_t gtime; + cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -570,6 +556,21 @@ struct signal_struct { #define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */ #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ +/* + * Pending notifications to parent. + */ +#define SIGNAL_CLD_STOPPED 0x00000010 +#define SIGNAL_CLD_CONTINUED 0x00000020 +#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) + +#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ + +/* If true, all threads except ->group_exit_task have pending SIGKILL */ +static inline int signal_group_exit(const struct signal_struct *sig) +{ + return (sig->flags & SIGNAL_GROUP_EXIT) || + (sig->group_exit_task != NULL); +} /* * Some day this will be a full-fledged user tracking system.. @@ -583,8 +584,10 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_POSIX_MQUEUE /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ +#endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ #ifdef CONFIG_KEYS @@ -593,10 +596,20 @@ struct user_struct { #endif /* Hash table maintenance information */ - struct list_head uidhash_list; + struct hlist_node uidhash_node; uid_t uid; + +#ifdef CONFIG_USER_SCHED + struct task_group *tg; +#ifdef CONFIG_SYSFS + struct kobject kobj; + struct work_struct work; +#endif +#endif }; +extern int uids_sysfs_init(void); + extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; @@ -608,13 +621,17 @@ struct reclaim_state; #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info { /* cumulative counters */ - unsigned long pcnt; /* # of times run on this cpu */ + unsigned long pcount; /* # of times run on this cpu */ unsigned long long cpu_time, /* time spent on the cpu */ run_delay; /* time spent waiting on a runqueue */ /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ +#ifdef CONFIG_SCHEDSTATS + /* BKL stats */ + unsigned int bkl_count; +#endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ @@ -681,7 +698,7 @@ enum cpu_idle_type { #define SCHED_LOAD_SHIFT 10 #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) -#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 1) +#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE #ifdef CONFIG_SMP #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ @@ -695,6 +712,7 @@ enum cpu_idle_type { #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ +#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ #define BALANCE_FOR_MC_POWER \ (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) @@ -724,12 +742,31 @@ struct sched_group { u32 reciprocal_cpu_power; }; +enum sched_domain_level { + SD_LV_NONE = 0, + SD_LV_SIBLING, + SD_LV_MC, + SD_LV_CPU, + SD_LV_NODE, + SD_LV_ALLNODES, + SD_LV_MAX +}; + +struct sched_domain_attr { + int relax_domain_level; +}; + +#define SD_ATTR_INIT (struct sched_domain_attr) { \ + .relax_domain_level = -1, \ +} + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ + int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ @@ -741,6 +778,7 @@ struct sched_domain { unsigned int wake_idx; unsigned int forkexec_idx; int flags; /* See SD_* */ + enum sched_domain_level level; /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ @@ -749,39 +787,40 @@ struct sched_domain { #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ - unsigned long lb_cnt[CPU_MAX_IDLE_TYPES]; - unsigned long lb_failed[CPU_MAX_IDLE_TYPES]; - unsigned long lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned long lb_imbalance[CPU_MAX_IDLE_TYPES]; - unsigned long lb_gained[CPU_MAX_IDLE_TYPES]; - unsigned long lb_hot_gained[CPU_MAX_IDLE_TYPES]; - unsigned long lb_nobusyg[CPU_MAX_IDLE_TYPES]; - unsigned long lb_nobusyq[CPU_MAX_IDLE_TYPES]; + unsigned int lb_count[CPU_MAX_IDLE_TYPES]; + unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; + unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; /* Active load balancing */ - unsigned long alb_cnt; - unsigned long alb_failed; - unsigned long alb_pushed; + unsigned int alb_count; + unsigned int alb_failed; + unsigned int alb_pushed; /* SD_BALANCE_EXEC stats */ - unsigned long sbe_cnt; - unsigned long sbe_balanced; - unsigned long sbe_pushed; + unsigned int sbe_count; + unsigned int sbe_balanced; + unsigned int sbe_pushed; /* SD_BALANCE_FORK stats */ - unsigned long sbf_cnt; - unsigned long sbf_balanced; - unsigned long sbf_pushed; + unsigned int sbf_count; + unsigned int sbf_balanced; + unsigned int sbf_pushed; /* try_to_wake_up() stats */ - unsigned long ttwu_wake_remote; - unsigned long ttwu_move_affine; - unsigned long ttwu_move_balance; + unsigned int ttwu_wake_remote; + unsigned int ttwu_move_affine; + unsigned int ttwu_move_balance; #endif }; -extern int partition_sched_domains(cpumask_t *partition1, - cpumask_t *partition2); +extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, + struct sched_domain_attr *dattr_new); +extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ @@ -803,10 +842,8 @@ static inline int above_background_load(void) } struct io_context; /* See blkdev.h */ -struct cpuset; - #define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) struct group_info { int ngroups; atomic_t usage; @@ -853,26 +890,51 @@ struct rq; struct sched_domain; struct sched_class { - struct sched_class *next; + const struct sched_class *next; void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); - void (*yield_task) (struct rq *rq, struct task_struct *p); + void (*yield_task) (struct rq *rq); + int (*select_task_rq)(struct task_struct *p, int sync); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p); +#ifdef CONFIG_SMP unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, - struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio); + int (*move_one_task) (struct rq *this_rq, int this_cpu, + struct rq *busiest, struct sched_domain *sd, + enum cpu_idle_type idle); + void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); + void (*post_schedule) (struct rq *this_rq); + void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); +#endif + void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_new) (struct rq *rq, struct task_struct *p); + void (*set_cpus_allowed)(struct task_struct *p, + const cpumask_t *newmask); + + void (*join_domain)(struct rq *rq); + void (*leave_domain)(struct rq *rq); + + void (*switched_from) (struct rq *this_rq, struct task_struct *task, + int running); + void (*switched_to) (struct rq *this_rq, struct task_struct *task, + int running); + void (*prio_changed) (struct rq *this_rq, struct task_struct *task, + int oldprio, int running); + +#ifdef CONFIG_FAIR_GROUP_SCHED + void (*moved_group) (struct task_struct *p); +#endif }; struct load_weight { @@ -887,30 +949,27 @@ struct load_weight { * 4 se->block_start * 4 se->run_node * 4 se->sleep_start - * 4 se->sleep_start_fair * 6 se->load.weight - * 7 se->delta_fair - * 15 se->wait_runtime */ struct sched_entity { - long wait_runtime; - unsigned long delta_fair_run; - unsigned long delta_fair_sleep; - unsigned long delta_exec; - s64 fair_key; struct load_weight load; /* for load-balancing */ struct rb_node run_node; + struct list_head group_node; unsigned int on_rq; u64 exec_start; u64 sum_exec_runtime; - u64 wait_start_fair; - u64 sleep_start_fair; + u64 vruntime; + u64 prev_sum_exec_runtime; + + u64 last_wakeup; + u64 avg_overlap; #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; - s64 sum_wait_runtime; + u64 wait_count; + u64 wait_sum; u64 sleep_start; u64 sleep_max; @@ -919,9 +978,25 @@ struct sched_entity { u64 block_start; u64 block_max; u64 exec_max; - - unsigned long wait_runtime_overruns; - unsigned long wait_runtime_underruns; + u64 slice_max; + + u64 nr_migrations; + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; + u64 nr_forced2_migrations; + + u64 nr_wakeups; + u64 nr_wakeups_sync; + u64 nr_wakeups_migrate; + u64 nr_wakeups_local; + u64 nr_wakeups_remote; + u64 nr_wakeups_affine; + u64 nr_wakeups_affine_attempts; + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -933,6 +1008,22 @@ struct sched_entity { #endif }; +struct sched_rt_entity { + struct list_head run_list; + unsigned int time_slice; + unsigned long timeout; + int nr_cpus_allowed; + + struct sched_rt_entity *back; +#ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity *parent; + /* rq on which this entity is (to be) queued: */ + struct rt_rq *rt_rq; + /* rq "owned" by this entity/group: */ + struct rt_rq *my_q; +#endif +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -949,23 +1040,36 @@ struct task_struct { #endif int prio, static_prio, normal_prio; - struct list_head run_list; - struct sched_class *sched_class; + const struct sched_class *sched_class; struct sched_entity se; + struct sched_rt_entity rt; #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ struct hlist_head preempt_notifiers; #endif - unsigned short ioprio; + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; + s8 oomkilladj; /* OOM kill score adjustment (bit shift). */ #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif unsigned int policy; cpumask_t cpus_allowed; - unsigned int time_slice; + +#ifdef CONFIG_PREEMPT_RCU + int rcu_read_lock_nesting; + int rcu_flipctr_idx; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1020,7 +1124,9 @@ struct task_struct { int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ unsigned int rt_priority; - cputime_t utime, stime; + cputime_t utime, stime, utimescaled, stimescaled; + cputime_t gtime; + cputime_t prev_utime, prev_stime; unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ @@ -1035,24 +1141,14 @@ struct task_struct { uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted; - unsigned keep_capabilities:1; + kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; + unsigned securebits; struct user_struct *user; #ifdef CONFIG_KEYS struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif - /* - * fpu_counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time - */ - unsigned char fpu_counter; - int oomkilladj; /* OOM kill score adjustment (bit shift). */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -1063,6 +1159,11 @@ struct task_struct { /* ipc stuff */ struct sysv_sem sysvsem; #endif +#ifdef CONFIG_DETECT_SOFTLOCKUP +/* hung task detection */ + unsigned long last_switch_timestamp; + unsigned long last_switch_count; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* filesystem information */ @@ -1076,7 +1177,7 @@ struct task_struct { struct sighand_struct *sighand; sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */ + sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ struct sigpending pending; unsigned long sas_ss_sp; @@ -1084,9 +1185,14 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - +#ifdef CONFIG_SECURITY void *security; +#endif struct audit_context *audit_context; +#ifdef CONFIG_AUDITSYSCALL + uid_t loginuid; + unsigned int sessionid; +#endif seccomp_t seccomp; /* Thread group tracking */ @@ -1125,7 +1231,7 @@ struct task_struct { int softirq_context; #endif #ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 30UL +# define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; struct held_lock held_locks[MAX_LOCK_DEPTH]; @@ -1147,13 +1253,6 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ -/* - * current io wait handle: wait queue entry to use for io waits - * If this thread is processing aio, this points at the waitqueue - * inside the currently handled kiocb. It may be NULL (i.e. default - * to a stack based synchronous wait) if its doing sync IO. - */ - wait_queue_t *io_wait; #ifdef CONFIG_TASK_XACCT /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; @@ -1169,18 +1268,24 @@ struct task_struct { short il_next; #endif #ifdef CONFIG_CPUSETS - struct cpuset *cpuset; nodemask_t mems_allowed; int cpuset_mems_generation; int cpuset_mem_spread_rotor; #endif +#ifdef CONFIG_CGROUPS + /* Control Group info protected by css_set_lock */ + struct css_set *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock */ + struct list_head cg_list; +#endif +#ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; - +#endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; @@ -1194,6 +1299,11 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif + struct prop_local_single dirties; +#ifdef CONFIG_LATENCYTOP + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; +#endif }; /* @@ -1227,24 +1337,14 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -static inline pid_t process_group(struct task_struct *tsk) +static inline void set_task_session(struct task_struct *tsk, pid_t session) { - return tsk->signal->pgrp; + tsk->signal->__session = session; } -static inline pid_t signal_session(struct signal_struct *sig) +static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) { - return sig->__session; -} - -static inline pid_t process_session(struct task_struct *tsk) -{ - return signal_session(tsk->signal); -} - -static inline void set_signal_session(struct signal_struct *sig, pid_t session) -{ - sig->__session = session; + tsk->signal->__pgrp = pgrp; } static inline struct pid *task_pid(struct task_struct *task) @@ -1267,6 +1367,74 @@ static inline struct pid *task_session(struct task_struct *task) return task->group_leader->pids[PIDTYPE_SID].pid; } +struct pid_namespace; + +/* + * the helpers to get the task's different pids as they are seen + * from various namespaces + * + * task_xid_nr() : global id, i.e. the id seen from the init namespace; + * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of + * current. + * task_xid_nr_ns() : id seen from the ns specified; + * + * set_task_vxid() : assigns a virtual id to a task; + * + * see also pid_nr() etc in include/linux/pid.h + */ + +static inline pid_t task_pid_nr(struct task_struct *tsk) +{ + return tsk->pid; +} + +pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_pid_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_pid(tsk)); +} + + +static inline pid_t task_tgid_nr(struct task_struct *tsk) +{ + return tsk->tgid; +} + +pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_tgid(tsk)); +} + + +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return tsk->signal->__pgrp; +} + +pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_pgrp_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_pgrp(tsk)); +} + + +static inline pid_t task_session_nr(struct task_struct *tsk) +{ + return tsk->signal->__session; +} + +pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_session_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_session(tsk)); +} + + /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. @@ -1281,16 +1449,22 @@ static inline int pid_alive(struct task_struct *p) } /** - * is_init - check if a task structure is init + * is_global_init - check if a task structure is init * @tsk: Task structure to be checked. * * Check if a task structure is the first user space task the kernel created. */ -static inline int is_init(struct task_struct *tsk) +static inline int is_global_init(struct task_struct *tsk) { return tsk->pid == 1; } +/* + * is_container_init: + * check whether in the task is init in its own pid namespace. + */ +extern int is_container_init(struct task_struct *tsk); + extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); @@ -1312,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ @@ -1360,18 +1535,53 @@ static inline void put_task_struct(struct task_struct *t) #define used_math() tsk_used_math(current) #ifdef CONFIG_SMP -extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask); +extern int set_cpus_allowed_ptr(struct task_struct *p, + const cpumask_t *new_mask); #else -static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) +static inline int set_cpus_allowed_ptr(struct task_struct *p, + const cpumask_t *new_mask) { - if (!cpu_isset(0, new_mask)) + if (!cpu_isset(0, *new_mask)) return -EINVAL; return 0; } #endif +static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) +{ + return set_cpus_allowed_ptr(p, &new_mask); +} extern unsigned long long sched_clock(void); +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init(void) +{ +} + +static inline u64 sched_clock_cpu(int cpu) +{ + return sched_clock(); +} + +static inline void sched_clock_tick(void) +{ +} + +static inline void sched_clock_idle_sleep_event(void) +{ +} + +static inline void sched_clock_idle_wakeup_event(u64 delta_ns) +{ +} +#else +extern void sched_clock_init(void); +extern u64 sched_clock_cpu(int cpu); +extern void sched_clock_tick(void); +extern void sched_clock_idle_sleep_event(void); +extern void sched_clock_idle_wakeup_event(u64 delta_ns); +#endif + /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): @@ -1399,13 +1609,33 @@ static inline void idle_task_exit(void) {} extern void sched_idle_next(void); -extern unsigned int sysctl_sched_granularity; +#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +extern void wake_up_idle_cpu(int cpu); +#else +static inline void wake_up_idle_cpu(int cpu) { } +#endif + +#ifdef CONFIG_SCHED_DEBUG +extern unsigned int sysctl_sched_latency; +extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_batch_wakeup_granularity; -extern unsigned int sysctl_sched_stat_granularity; -extern unsigned int sysctl_sched_runtime_limit; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; +extern unsigned int sysctl_sched_migration_cost; +extern unsigned int sysctl_sched_nr_migrate; + +int sched_nr_latency_handler(struct ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, + loff_t *ppos); +#endif +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + +int sched_rt_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + +extern unsigned int sysctl_sched_compat_yield; #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); @@ -1457,9 +1687,36 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) -extern struct task_struct *find_task_by_pid_type(int type, int pid); -extern void __set_special_pids(pid_t session, pid_t pgrp); +extern struct pid_namespace init_pid_ns; + +/* + * find a task by one of its numerical ids + * + * find_task_by_pid_type_ns(): + * it is the most generic call - it finds a task by all id, + * type and namespace specified + * find_task_by_pid_ns(): + * finds a task by its pid in the specified namespace + * find_task_by_vpid(): + * finds a task by its virtual pid + * find_task_by_pid(): + * finds a task by its global pid + * + * see also find_pid() etc in include/linux/pid.h + */ + +extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, + struct pid_namespace *ns); + +static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr) +{ + return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); +} +extern struct task_struct *find_task_by_vpid(pid_t nr); +extern struct task_struct *find_task_by_pid_ns(pid_t nr, + struct pid_namespace *ns); + +extern void __set_special_pids(struct pid *pid); /* per-UID process charging. */ extern struct user_struct * alloc_uid(struct user_namespace *, uid_t); @@ -1470,15 +1727,16 @@ static inline struct user_struct *get_uid(struct user_struct *u) } extern void free_uid(struct user_struct *); extern void switch_uid(struct user_struct *); +extern void release_uids(struct user_namespace *ns); #include extern void do_timer(unsigned long ticks); -extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state)); -extern int FASTCALL(wake_up_process(struct task_struct * tsk)); -extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, - unsigned long clone_flags)); +extern int wake_up_state(struct task_struct *tsk, unsigned int state); +extern int wake_up_process(struct task_struct *tsk); +extern void wake_up_new_task(struct task_struct *tsk, + unsigned long clone_flags); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else @@ -1513,11 +1771,9 @@ extern void block_all_signals(int (*notifier)(void *priv), void *priv, extern void unblock_all_signals(void); extern void release_task(struct task_struct * p); extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int send_group_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sigsegv(int, struct task_struct *); extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32); extern int kill_pgrp(struct pid *pid, int sig, int priv); @@ -1531,8 +1787,7 @@ extern void zap_other_threads(struct task_struct *p); extern int kill_proc(pid_t, int, int); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(int, struct sigqueue *, struct task_struct *); -extern int send_group_sigqueue(int, struct sigqueue *, struct task_struct *); +extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); @@ -1570,7 +1825,7 @@ static inline int sas_ss_flags(unsigned long sp) extern struct mm_struct * mm_alloc(void); /* mmdrop drops the mm and the page tables */ -extern void FASTCALL(__mmdrop(struct mm_struct *)); +extern void __mmdrop(struct mm_struct *); static inline void mmdrop(struct mm_struct * mm) { if (unlikely(atomic_dec_and_test(&mm->mm_count))) @@ -1583,6 +1838,8 @@ extern void mmput(struct mm_struct *); extern struct mm_struct *get_task_mm(struct task_struct *task); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); +/* Allocate a new mm structure and copy contents from tsk->mm */ +extern struct mm_struct *dup_mm(struct task_struct *tsk); extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); extern void flush_thread(void); @@ -1604,7 +1861,7 @@ extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned lon struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); -extern void get_task_comm(char *to, struct task_struct *tsk); +extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP extern void wait_task_inactive(struct task_struct * p); @@ -1644,6 +1901,12 @@ static inline int has_group_leader_pid(struct task_struct *p) return p->pid == p->tgid; } +static inline +int same_thread_group(struct task_struct *p1, struct task_struct *p2) +{ + return p1->tgid == p2->tgid; +} + static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry(rcu_dereference(p->thread_group.next), @@ -1661,7 +1924,8 @@ static inline int thread_group_empty(struct task_struct *p) /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset. + * pins the final release of task.io_context. Also protects ->cpuset and + * ->cgroup.subsys[]. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), @@ -1704,6 +1968,8 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #endif +extern void thread_info_cache_init(void); + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ @@ -1742,14 +2008,26 @@ static inline void clear_tsk_need_resched(struct task_struct *tsk) clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } +static inline int test_tsk_need_resched(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); +} + static inline int signal_pending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } - + +extern int __fatal_signal_pending(struct task_struct *p); + +static inline int fatal_signal_pending(struct task_struct *p) +{ + return signal_pending(p) && __fatal_signal_pending(p); +} + static inline int need_resched(void) { - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); + return unlikely(test_tsk_need_resched(current)); } /* @@ -1759,29 +2037,33 @@ static inline int need_resched(void) * cond_resched_lock() will drop the spinlock before scheduling, * cond_resched_softirq() will enable bhs before scheduling. */ -extern int cond_resched(void); -extern int cond_resched_lock(spinlock_t * lock); -extern int cond_resched_softirq(void); - -/* - * Does a critical section need to be broken due to another - * task waiting?: - */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -# define need_lockbreak(lock) ((lock)->break_lock) +#ifdef CONFIG_PREEMPT +static inline int cond_resched(void) +{ + return 0; +} #else -# define need_lockbreak(lock) 0 +extern int _cond_resched(void); +static inline int cond_resched(void) +{ + return _cond_resched(); +} #endif +extern int cond_resched_lock(spinlock_t * lock); +extern int cond_resched_softirq(void); /* * Does a critical section need to be broken due to another - * task waiting or preemption being signalled: + * task waiting?: (technically does not depend on CONFIG_PREEMPT, + * but a general need for low latency) */ -static inline int lock_need_resched(spinlock_t *lock) +static inline int spin_needbreak(spinlock_t *lock) { - if (need_lockbreak(lock) || need_resched()) - return 1; +#ifdef CONFIG_PREEMPT + return spin_is_contended(lock); +#else return 0; +#endif } /* @@ -1831,13 +2113,37 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif -extern long sched_setaffinity(pid_t pid, cpumask_t new_mask); +extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); extern int sched_mc_power_savings, sched_smt_power_savings; extern void normalize_rt_tasks(void); +#ifdef CONFIG_GROUP_SCHED + +extern struct task_group init_task_group; +#ifdef CONFIG_USER_SCHED +extern struct task_group root_task_group; +#endif + +extern struct task_group *sched_create_group(struct task_group *parent); +extern void sched_destroy_group(struct task_group *tg); +extern void sched_move_task(struct task_struct *tsk); +#ifdef CONFIG_FAIR_GROUP_SCHED +extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); +extern unsigned long sched_group_shares(struct task_group *tg); +#endif +#ifdef CONFIG_RT_GROUP_SCHED +extern int sched_group_set_rt_runtime(struct task_group *tg, + long rt_runtime_us); +extern long sched_group_rt_runtime(struct task_group *tg); +extern int sched_group_set_rt_period(struct task_group *tg, + long rt_period_us); +extern long sched_group_rt_period(struct task_group *tg); +#endif +#endif + #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { @@ -1876,6 +2182,31 @@ static inline void inc_syscw(struct task_struct *tsk) } #endif +#ifdef CONFIG_SMP +void migration_init(void); +#else +static inline void migration_init(void) +{ +} +#endif + +#ifndef TASK_SIZE_OF +#define TASK_SIZE_OF(tsk) TASK_SIZE +#endif + +#ifdef CONFIG_MM_OWNER +extern void mm_update_next_owner(struct mm_struct *mm); +extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} + +static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ +} +#endif /* CONFIG_MM_OWNER */ + #endif /* __KERNEL__ */ #endif