#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
+#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#define CLONE_IO 0x80000000 /* Clone io context */
/*
* Scheduling policies
#include <linux/proportions.h>
#include <linux/seccomp.h>
#include <linux/rcupdate.h>
-#include <linux/futex.h>
#include <linux/rtmutex.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/task_io_accounting.h>
#include <linux/kobject.h>
+#include <linux/latencytop.h>
#include <asm/processor.h>
struct exec_domain;
struct futex_pi_state;
+struct robust_list_head;
struct bio;
/*
}
#endif
+extern unsigned long rt_needs_cpu(int cpu);
+
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
extern void cpu_init (void);
extern void trap_init(void);
+extern void account_process_tick(struct task_struct *task, int user);
extern void update_process_times(int user);
extern void scheduler_tick(void);
+extern void hrtick_resched(void);
+
+extern void sched_show_task(struct task_struct *p);
#ifdef CONFIG_DETECT_SOFTLOCKUP
extern void softlockup_tick(void);
extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
-extern int softlockup_thresh;
+extern unsigned long softlockup_thresh;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
#else
static inline void softlockup_tick(void)
{
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text")))
+
+/* Linker adds these: start and end of __sched functions */
+extern char __sched_text_start[], __sched_text_end[];
+
/* Is this address in the __sched functions? */
extern int in_sched_functions(unsigned long addr);
cputime_t it_prof_incr, it_virt_incr;
/* job control IDs */
- pid_t pgrp;
+
+ /*
+ * pgrp and session fields are deprecated.
+ * use the task_session_Xnr and task_pgrp_Xnr routines below
+ */
+
+ union {
+ pid_t pgrp __deprecated;
+ pid_t __pgrp;
+ };
+
struct pid *tty_old_pgrp;
union {
atomic_t inotify_watches; /* How many inotify watches does this user have? */
atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
#endif
+#ifdef CONFIG_POSIX_MQUEUE
/* protected by mq_lock */
unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
+#endif
unsigned long locked_shm; /* How many pages of mlocked shm ? */
#ifdef CONFIG_KEYS
#ifdef CONFIG_FAIR_USER_SCHED
struct task_group *tg;
- struct kset kset;
- struct subsys_attribute user_attr;
+#ifdef CONFIG_SYSFS
+ struct kobject kobj;
struct work_struct work;
#endif
+#endif
};
-#ifdef CONFIG_FAIR_USER_SCHED
-extern int uids_kobject_init(void);
-#else
-static inline int uids_kobject_init(void) { return 0; }
-#endif
+extern int uids_sysfs_init(void);
extern struct user_struct *find_user(uid_t);
last_queued; /* when we were last queued to run */
#ifdef CONFIG_SCHEDSTATS
/* BKL stats */
- unsigned long bkl_count;
+ unsigned int bkl_count;
#endif
};
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
- unsigned long lb_count[CPU_MAX_IDLE_TYPES];
- unsigned long lb_failed[CPU_MAX_IDLE_TYPES];
- unsigned long lb_balanced[CPU_MAX_IDLE_TYPES];
- unsigned long lb_imbalance[CPU_MAX_IDLE_TYPES];
- unsigned long lb_gained[CPU_MAX_IDLE_TYPES];
- unsigned long lb_hot_gained[CPU_MAX_IDLE_TYPES];
- unsigned long lb_nobusyg[CPU_MAX_IDLE_TYPES];
- unsigned long lb_nobusyq[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_count[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
/* Active load balancing */
- unsigned long alb_count;
- unsigned long alb_failed;
- unsigned long alb_pushed;
+ unsigned int alb_count;
+ unsigned int alb_failed;
+ unsigned int alb_pushed;
/* SD_BALANCE_EXEC stats */
- unsigned long sbe_count;
- unsigned long sbe_balanced;
- unsigned long sbe_pushed;
+ unsigned int sbe_count;
+ unsigned int sbe_balanced;
+ unsigned int sbe_pushed;
/* SD_BALANCE_FORK stats */
- unsigned long sbf_count;
- unsigned long sbf_balanced;
- unsigned long sbf_pushed;
+ unsigned int sbf_count;
+ unsigned int sbf_balanced;
+ unsigned int sbf_pushed;
/* try_to_wake_up() stats */
- unsigned long ttwu_wake_remote;
- unsigned long ttwu_move_affine;
- unsigned long ttwu_move_balance;
+ unsigned int ttwu_wake_remote;
+ unsigned int ttwu_move_affine;
+ unsigned int ttwu_move_balance;
#endif
};
+extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
+
#endif /* CONFIG_SMP */
/*
}
struct io_context; /* See blkdev.h */
-struct cpuset;
-
#define NGROUPS_SMALL 32
#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))
struct group_info {
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
void (*yield_task) (struct rq *rq);
+ int (*select_task_rq)(struct task_struct *p, int sync);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+#ifdef CONFIG_SMP
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
- struct rq *busiest,
- unsigned long max_nr_move, unsigned long max_load_move,
+ struct rq *busiest, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio);
+ int (*move_one_task) (struct rq *this_rq, int this_cpu,
+ struct rq *busiest, struct sched_domain *sd,
+ enum cpu_idle_type idle);
+ void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ void (*post_schedule) (struct rq *this_rq);
+ void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+#endif
+
void (*set_curr_task) (struct rq *rq);
- void (*task_tick) (struct rq *rq, struct task_struct *p);
+ void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_new) (struct rq *rq, struct task_struct *p);
+ void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
+
+ void (*join_domain)(struct rq *rq);
+ void (*leave_domain)(struct rq *rq);
+
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio, int running);
};
struct load_weight {
struct load_weight load; /* for load-balancing */
struct rb_node run_node;
unsigned int on_rq;
- int peer_preempt;
u64 exec_start;
u64 sum_exec_runtime;
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
u64 wait_max;
+ u64 wait_count;
+ u64 wait_sum;
u64 sleep_start;
u64 sleep_max;
#endif
};
+struct sched_rt_entity {
+ struct list_head run_list;
+ unsigned int time_slice;
+ unsigned long timeout;
+ int nr_cpus_allowed;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_rt_entity *parent;
+ /* rq on which this entity is (to be) queued: */
+ struct rt_rq *rt_rq;
+ /* rq "owned" by this entity/group: */
+ struct rt_rq *my_q;
+#endif
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
#endif
int prio, static_prio, normal_prio;
- struct list_head run_list;
const struct sched_class *sched_class;
struct sched_entity se;
+ struct sched_rt_entity rt;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
- unsigned short ioprio;
+ /*
+ * fpu_counter contains the number of consecutive context switches
+ * that the FPU is used. If this is over a threshold, the lazy fpu
+ * saving becomes unlazy to save the trap. This is an unsigned char
+ * so that after 256 times the counter wraps and the behavior turns
+ * lazy again; this to deal with bursty apps that only use FPU for
+ * a short time
+ */
+ unsigned char fpu_counter;
+ s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
unsigned int policy;
cpumask_t cpus_allowed;
- unsigned int time_slice;
+
+#ifdef CONFIG_PREEMPT_RCU
+ int rcu_read_lock_nesting;
+ int rcu_flipctr_idx;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
unsigned int rt_priority;
- cputime_t utime, stime;
+ cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
+ cputime_t prev_utime, prev_stime;
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */
struct key *thread_keyring; /* keyring private to this thread */
unsigned char jit_keyring; /* default keyring to attach requested keys to */
#endif
- /*
- * fpu_counter contains the number of consecutive context switches
- * that the FPU is used. If this is over a threshold, the lazy fpu
- * saving becomes unlazy to save the trap. This is an unsigned char
- * so that after 256 times the counter wraps and the behavior turns
- * lazy again; this to deal with bursty apps that only use FPU for
- * a short time
- */
- unsigned char fpu_counter;
- s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
/* ipc stuff */
struct sysv_sem sysvsem;
#endif
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+/* hung task detection */
+ unsigned long last_switch_timestamp;
+ unsigned long last_switch_count;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/* filesystem information */
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
-
+#ifdef CONFIG_SECURITY
void *security;
+#endif
struct audit_context *audit_context;
seccomp_t seccomp;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
-/*
- * current io wait handle: wait queue entry to use for io waits
- * If this thread is processing aio, this points at the waitqueue
- * inside the currently handled kiocb. It may be NULL (i.e. default
- * to a stack based synchronous wait) if its doing sync IO.
- */
- wait_queue_t *io_wait;
#ifdef CONFIG_TASK_XACCT
/* i/o counters(bytes read/written, #syscalls */
u64 rchar, wchar, syscr, syscw;
short il_next;
#endif
#ifdef CONFIG_CPUSETS
- struct cpuset *cpuset;
nodemask_t mems_allowed;
int cpuset_mems_generation;
int cpuset_mem_spread_rotor;
#endif
+#ifdef CONFIG_CGROUPS
+ /* Control Group info protected by css_set_lock */
+ struct css_set *cgroups;
+ /* cg_list protected by css_set_lock and tsk->alloc_lock */
+ struct list_head cg_list;
+#endif
+#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
-
+#endif
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
int make_it_fail;
#endif
struct prop_local_single dirties;
+#ifdef CONFIG_LATENCYTOP
+ int latency_record_count;
+ struct latency_record latency_record[LT_SAVECOUNT];
+#endif
};
/*
return rt_prio(p->prio);
}
-static inline pid_t process_group(struct task_struct *tsk)
-{
- return tsk->signal->pgrp;
-}
-
-static inline pid_t signal_session(struct signal_struct *sig)
-{
- return sig->__session;
-}
-
-static inline pid_t process_session(struct task_struct *tsk)
+static inline void set_task_session(struct task_struct *tsk, pid_t session)
{
- return signal_session(tsk->signal);
+ tsk->signal->__session = session;
}
-static inline void set_signal_session(struct signal_struct *sig, pid_t session)
+static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
{
- sig->__session = session;
+ tsk->signal->__pgrp = pgrp;
}
static inline struct pid *task_pid(struct task_struct *task)
return task->group_leader->pids[PIDTYPE_SID].pid;
}
+struct pid_namespace;
+
+/*
+ * the helpers to get the task's different pids as they are seen
+ * from various namespaces
+ *
+ * task_xid_nr() : global id, i.e. the id seen from the init namespace;
+ * task_xid_vnr() : virtual id, i.e. the id seen from the namespace the task
+ * belongs to. this only makes sence when called in the
+ * context of the task that belongs to the same namespace;
+ * task_xid_nr_ns() : id seen from the ns specified;
+ *
+ * set_task_vxid() : assigns a virtual id to a task;
+ *
+ * see also pid_nr() etc in include/linux/pid.h
+ */
+
+static inline pid_t task_pid_nr(struct task_struct *tsk)
+{
+ return tsk->pid;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pid_vnr(struct task_struct *tsk)
+{
+ return pid_vnr(task_pid(tsk));
+}
+
+
+static inline pid_t task_tgid_nr(struct task_struct *tsk)
+{
+ return tsk->tgid;
+}
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+ return pid_vnr(task_tgid(tsk));
+}
+
+
+static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+{
+ return tsk->signal->__pgrp;
+}
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
+{
+ return pid_vnr(task_pgrp(tsk));
+}
+
+
+static inline pid_t task_session_nr(struct task_struct *tsk)
+{
+ return tsk->signal->__session;
+}
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_session_vnr(struct task_struct *tsk)
+{
+ return pid_vnr(task_session(tsk));
+}
+
+
/**
* pid_alive - check that a task structure is not stale
* @p: Task structure to be checked.
}
/**
- * is_init - check if a task structure is init
+ * is_global_init - check if a task structure is init
* @tsk: Task structure to be checked.
*
* Check if a task structure is the first user space task the kernel created.
*/
-static inline int is_init(struct task_struct *tsk)
+static inline int is_global_init(struct task_struct *tsk)
{
return tsk->pid == 1;
}
+/*
+ * is_container_init:
+ * check whether in the task is init in its own pid namespace.
+ */
+extern int is_container_init(struct task_struct *tsk);
+
extern struct pid *cad_pid;
extern void free_task(struct task_struct *tsk);
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_nr_latency;
+extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_rt_period;
+extern unsigned int sysctl_sched_rt_ratio;
+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+extern unsigned int sysctl_sched_min_bal_int_shares;
+extern unsigned int sysctl_sched_max_bal_int_shares;
+#endif
+
+int sched_nr_latency_handler(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length,
+ loff_t *ppos);
#endif
extern unsigned int sysctl_sched_compat_yield;
extern struct mm_struct init_mm;
-#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+extern struct pid_namespace init_pid_ns;
+
+/*
+ * find a task by one of its numerical ids
+ *
+ * find_task_by_pid_type_ns():
+ * it is the most generic call - it finds a task by all id,
+ * type and namespace specified
+ * find_task_by_pid_ns():
+ * finds a task by its pid in the specified namespace
+ * find_task_by_vpid():
+ * finds a task by its virtual pid
+ * find_task_by_pid():
+ * finds a task by its global pid
+ *
+ * see also find_pid() etc in include/linux/pid.h
+ */
+
+extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
+ struct pid_namespace *ns);
+
+extern struct task_struct *find_task_by_pid(pid_t nr);
+extern struct task_struct *find_task_by_vpid(pid_t nr);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr,
+ struct pid_namespace *ns);
+
extern void __set_special_pids(pid_t session, pid_t pgrp);
/* per-UID process charging. */
return p->pid == p->tgid;
}
+static inline
+int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+ return p1->tgid == p2->tgid;
+}
+
static inline struct task_struct *next_thread(const struct task_struct *p)
{
return list_entry(rcu_dereference(p->thread_group.next),
/*
* Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
* subscriptions and synchronises with wait4(). Also used in procfs. Also
- * pins the final release of task.io_context. Also protects ->cpuset.
+ * pins the final release of task.io_context. Also protects ->cpuset and
+ * ->cgroup.subsys[].
*
* Nests both inside and outside of read_lock(&tasklist_lock).
* It must not be nested with write_lock_irq(&tasklist_lock),
* cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/
-extern int cond_resched(void);
+#ifdef CONFIG_PREEMPT
+static inline int cond_resched(void)
+{
+ return 0;
+}
+#else
+extern int _cond_resched(void);
+static inline int cond_resched(void)
+{
+ return _cond_resched();
+}
+#endif
extern int cond_resched_lock(spinlock_t * lock);
extern int cond_resched_softirq(void);
}
#endif
+#ifdef CONFIG_SMP
+void migration_init(void);
+#else
+static inline void migration_init(void)
+{
+}
+#endif
+
#endif /* __KERNEL__ */
#endif