struct futex_pi_state;
struct robust_list_head;
struct bio;
+struct bts_tracer;
/*
* List of flags we want to share for kernel threads,
extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(void);
+extern void task_rq_unlock_wait(struct task_struct *p);
-extern cpumask_t nohz_cpu_mask;
+extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
#else
}
#endif
-extern unsigned long rt_needs_cpu(int cpu);
-
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
extern void cpu_init (void);
extern void trap_init(void);
-extern void account_process_tick(struct task_struct *task, int user);
extern void update_process_times(int user);
extern void scheduler_tick(void);
extern void softlockup_tick(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
+extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
(mm)->hiwater_vm = (mm)->total_vm; \
} while (0)
+#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm))
+#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm)
+
extern void set_dumpable(struct mm_struct *mm, int value);
extern int get_dumpable(struct mm_struct *mm);
#define MMF_DUMP_MAPPED_PRIVATE 4
#define MMF_DUMP_MAPPED_SHARED 5
#define MMF_DUMP_ELF_HEADERS 6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED 8
#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS 5
+#define MMF_DUMP_FILTER_BITS 7
#define MMF_DUMP_FILTER_MASK \
(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
#define MMF_DUMP_FILTER_DEFAULT \
- ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
+ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF 0
+#endif
struct sighand_struct {
atomic_t count;
unsigned long ac_minflt, ac_majflt;
};
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime: time spent in user mode, in &cputime_t units
+ * @stime: time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
+ *
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups. Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+ cputime_t utime;
+ cputime_t stime;
+ unsigned long long sum_exec_runtime;
+ spinlock_t lock;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp stime
+#define virt_exp utime
+#define sched_exp sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals: thread group interval timers; substructure for
+ * uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+struct thread_group_cputime {
+ struct task_cputime totals;
+};
+
/*
* NOTE! "signal_struct" does not have it's own
* locking, because a shared signal_struct always
cputime_t it_prof_expires, it_virt_expires;
cputime_t it_prof_incr, it_virt_incr;
+ /*
+ * Thread group totals for process CPU clocks.
+ * See thread_group_cputime(), et al, for details.
+ */
+ struct thread_group_cputime cputime;
+
+ /* Earliest-expiration cache. */
+ struct task_cputime cputime_expires;
+
+ struct list_head cpu_timers[3];
+
/* job control IDs */
/*
* Live threads maintain their own counters and add to these
* in __exit_signal, except for the group leader.
*/
- cputime_t utime, stime, cutime, cstime;
+ cputime_t cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
struct task_io_accounting ioac;
/*
- * Cumulative ns of scheduled CPU time for dead threads in the
- * group, not including a zombie group leader. (This only differs
- * from jiffies_to_ns(utime + stime) if sched_clock uses something
- * other than jiffies.)
- */
- unsigned long long sum_sched_runtime;
-
- /*
* We don't bother to synchronize most readers of this at all,
* because there is no reader checking a limit that actually needs
* to get both rlim_cur and rlim_max atomically, and either one
*/
struct rlimit rlim[RLIM_NLIMITS];
- struct list_head cpu_timers[3];
-
- /* keep the process-shared keyrings here so that they do the right
- * thing in threads created with CLONE_THREAD */
-#ifdef CONFIG_KEYS
- struct key *session_keyring; /* keyring inherited over fork */
- struct key *process_keyring; /* keyring private to this process */
-#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
struct pacct_struct pacct; /* per-process accounting information */
#endif
atomic_t inotify_watches; /* How many inotify watches does this user have? */
atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
#endif
+#ifdef CONFIG_EPOLL
+ atomic_t epoll_watches; /* The number of file descriptors currently watched */
+#endif
#ifdef CONFIG_POSIX_MQUEUE
/* protected by mq_lock */
unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
/* Hash table maintenance information */
struct hlist_node uidhash_node;
uid_t uid;
+ struct user_namespace *user_ns;
#ifdef CONFIG_USER_SCHED
struct task_group *tg;
extern struct user_struct root_user;
#define INIT_USER (&root_user)
+
struct backing_dev_info;
struct reclaim_state;
struct sched_info {
/* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */
- unsigned long long cpu_time, /* time spent on the cpu */
- run_delay; /* time spent waiting on a runqueue */
+ unsigned long long run_delay; /* time spent waiting on a runqueue */
/* timestamps */
unsigned long long last_arrival,/* when we last ran on a cpu */
};
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
-#ifdef CONFIG_SCHEDSTATS
-extern const struct file_operations proc_schedstat_operations;
-#endif /* CONFIG_SCHEDSTATS */
-
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
spinlock_t lock;
#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
-#define BALANCE_FOR_MC_POWER \
- (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
+enum powersavings_balance_level {
+ POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
+ POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package
+ * first for long running threads
+ */
+ POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle
+ * cpu package for power savings
+ */
+ MAX_POWERSAVINGS_BALANCE_LEVELS
+};
+
+extern int sched_mc_power_savings, sched_smt_power_savings;
+
+static inline int sd_balance_for_mc_power(void)
+{
+ if (sched_smt_power_savings)
+ return SD_POWERSAVINGS_BALANCE;
+
+ return 0;
+}
-#define BALANCE_FOR_PKG_POWER \
- ((sched_mc_power_savings || sched_smt_power_savings) ? \
- SD_POWERSAVINGS_BALANCE : 0)
+static inline int sd_balance_for_package_power(void)
+{
+ if (sched_mc_power_savings | sched_smt_power_savings)
+ return SD_POWERSAVINGS_BALANCE;
+
+ return 0;
+}
-#define test_sd_parent(sd, flag) ((sd->parent && \
- (sd->parent->flags & flag)) ? 1 : 0)
+/*
+ * Optimise SD flags for power savings:
+ * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
+ * Keep default SD flags if sched_{smt,mc}_power_saving=0
+ */
+static inline int sd_power_saving_flags(void)
+{
+ if (sched_mc_power_savings | sched_smt_power_savings)
+ return SD_BALANCE_NEWIDLE;
+
+ return 0;
+}
struct sched_group {
struct sched_group *next; /* Must be a circular list */
- cpumask_t cpumask;
/*
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
* (see include/linux/reciprocal_div.h)
*/
u32 reciprocal_cpu_power;
+
+ unsigned long cpumask[];
};
+static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+{
+ return to_cpumask(sg->cpumask);
+}
+
enum sched_domain_level {
SD_LV_NONE = 0,
SD_LV_SIBLING,
struct sched_domain *parent; /* top domain must be null terminated */
struct sched_domain *child; /* bottom domain must be null terminated */
struct sched_group *groups; /* the balancing groups of the domain */
- cpumask_t span; /* span of all CPUs in this domain */
unsigned long min_interval; /* Minimum balance interval ms */
unsigned long max_interval; /* Maximum balance interval ms */
unsigned int busy_factor; /* less balancing by factor if busy */
#ifdef CONFIG_SCHED_DEBUG
char *name;
#endif
+
+ /* span of all CPUs in this domain */
+ unsigned long span[];
};
-extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+ return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
struct sched_domain_attr *dattr_new);
-extern int arch_reinit_sched_domains(void);
+
+/* Test a flag in parent sched domain */
+static inline int test_sd_parent(struct sched_domain *sd, int flag)
+{
+ if (sd->parent && (sd->parent->flags & flag))
+ return 1;
+
+ return 0;
+}
#else /* CONFIG_SMP */
struct sched_domain_attr;
static inline void
-partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
struct sched_domain_attr *dattr_new)
{
}
#endif /* !CONFIG_SMP */
struct io_context; /* See blkdev.h */
-#define NGROUPS_SMALL 32
-#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
-struct group_info {
- int ngroups;
- atomic_t usage;
- gid_t small_block[NGROUPS_SMALL];
- int nblocks;
- gid_t *blocks[0];
-};
-
-/*
- * get_group_info() must be called with the owning task locked (via task_lock())
- * when task != current. The reason being that the vast majority of callers are
- * looking at current->group_info, which can not be changed except by the
- * current task. Changing current->group_info requires the task lock, too.
- */
-#define get_group_info(group_info) do { \
- atomic_inc(&(group_info)->usage); \
-} while (0)
-
-#define put_group_info(group_info) do { \
- if (atomic_dec_and_test(&(group_info)->usage)) \
- groups_free(group_info); \
-} while (0)
-extern struct group_info *groups_alloc(int gidsetsize);
-extern void groups_free(struct group_info *group_info);
-extern int set_current_groups(struct group_info *group_info);
-extern int groups_search(struct group_info *group_info, gid_t grp);
-/* access the groups "array" with this macro */
-#define GROUP_AT(gi, i) \
- ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
extern void prefetch_stack(struct task_struct *t);
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
void (*yield_task) (struct rq *rq);
- int (*select_task_rq)(struct task_struct *p, int sync);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
+ int (*select_task_rq)(struct task_struct *p, int sync);
+
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
struct rq *busiest, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
void (*post_schedule) (struct rq *this_rq);
void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
-#endif
- void (*set_curr_task) (struct rq *rq);
- void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
- void (*task_new) (struct rq *rq, struct task_struct *p);
void (*set_cpus_allowed)(struct task_struct *p,
- const cpumask_t *newmask);
+ const struct cpumask *newmask);
void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq);
+#endif
+
+ void (*set_curr_task) (struct rq *rq);
+ void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+ void (*task_new) (struct rq *rq, struct task_struct *p);
void (*switched_from) (struct rq *this_rq, struct task_struct *task,
int running);
struct list_head ptraced;
struct list_head ptrace_entry;
+#ifdef CONFIG_X86_PTRACE_BTS
+ /*
+ * This is the tracer handle for the ptrace BTS extension.
+ * This field actually belongs to the ptracer task.
+ */
+ struct bts_tracer *bts;
+ /*
+ * The buffer to hold the BTS data.
+ */
+ void *bts_buffer;
+ size_t bts_size;
+#endif /* CONFIG_X86_PTRACE_BTS */
+
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
- cputime_t it_prof_expires, it_virt_expires;
- unsigned long long it_sched_expires;
+ struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
/* process credentials */
- uid_t uid,euid,suid,fsuid;
- gid_t gid,egid,sgid,fsgid;
- struct group_info *group_info;
- kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
- struct user_struct *user;
- unsigned securebits;
-#ifdef CONFIG_KEYS
- unsigned char jit_keyring; /* default keyring to attach requested keys to */
- struct key *request_key_auth; /* assumed request_key authority */
- struct key *thread_keyring; /* keyring private to this thread */
-#endif
+ const struct cred *real_cred; /* objective and real subjective task
+ * credentials (COW) */
+ const struct cred *cred; /* effective (overridable) subjective task
+ * credentials (COW) */
+ struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */
+
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
-#ifdef CONFIG_SECURITY
- void *security;
-#endif
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
+ /*
+ * time slack values; these are used to round up poll() and
+ * select() etc timeout values. These are in nanoseconds.
+ */
+ unsigned long timer_slack_ns;
+ unsigned long default_timer_slack_ns;
+
+ struct list_head *scm_work_list;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Index of current stored adress in ret_stack */
+ int curr_ret_stack;
+ /* Stack of return addresses for return function tracing */
+ struct ftrace_ret_stack *ret_stack;
+ /*
+ * Number of functions that haven't been traced
+ * because of depth overrun.
+ */
+ atomic_t trace_overrun;
+ /* Pause for the tracing */
+ atomic_t tracing_graph_pause;
+#endif
+#ifdef CONFIG_TRACING
+ /* state flags for use by tracers */
+ unsigned long trace;
+#endif
};
/*
#ifdef CONFIG_SMP
extern int set_cpus_allowed_ptr(struct task_struct *p,
- const cpumask_t *new_mask);
+ const struct cpumask *new_mask);
#else
static inline int set_cpus_allowed_ptr(struct task_struct *p,
- const cpumask_t *new_mask)
+ const struct cpumask *new_mask)
{
- if (!cpu_isset(0, *new_mask))
+ if (!cpumask_test_cpu(0, new_mask))
return -EINVAL;
return 0;
}
extern unsigned long long
task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
static inline void wake_up_idle_cpu(int cpu) { }
#endif
-#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_shares_ratelimit;
+extern unsigned int sysctl_sched_shares_thresh;
+#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_shares_ratelimit;
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
return u;
}
extern void free_uid(struct user_struct *);
-extern void switch_uid(struct user_struct *);
extern void release_uids(struct user_namespace *ns);
#include <asm/current.h>
extern void sched_fork(struct task_struct *p, int clone_flags);
extern void sched_dead(struct task_struct *p);
-extern int in_group_p(gid_t);
-extern int in_egroup_p(gid_t);
-
extern void proc_caches_init(void);
extern void flush_signals(struct task_struct *);
extern void ignore_signals(struct task_struct *);
#define for_each_process(p) \
for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+extern bool is_single_threaded(struct task_struct *);
+
/*
* Careful: do_each_thread/while_each_thread is a double loop so
* 'break' will not work as expected - use goto instead.
}
/*
+ * Thread group CPU time accounting.
+ */
+
+static inline
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct task_cputime *totals = &tsk->signal->cputime.totals;
+ unsigned long flags;
+
+ spin_lock_irqsave(&totals->lock, flags);
+ *times = *totals;
+ spin_unlock_irqrestore(&totals->lock, flags);
+}
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+ sig->cputime.totals = (struct task_cputime){
+ .utime = cputime_zero,
+ .stime = cputime_zero,
+ .sum_exec_runtime = 0,
+ };
+
+ spin_lock_init(&sig->cputime.totals.lock);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+}
+
+/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
* This is required every time the blocked sigset_t changes.
}
#endif
-extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
-extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
-
-extern int sched_mc_power_savings, sched_smt_power_savings;
+extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
extern void normalize_rt_tasks(void);
extern struct task_group init_task_group;
#ifdef CONFIG_USER_SCHED
extern struct task_group root_task_group;
+extern void set_tg_uid(struct user_struct *user);
#endif
extern struct task_group *sched_create_group(struct task_group *parent);