X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched.c;h=8e2558c2ba67be440ef7bcb2d95ff8138c806404;hb=7e0f7cf582abd6c85232331dfe726a4e4b0fd98e;hp=545c6fccd1dc7ceba0e33e33250fc38369a41fab;hpb=7d3b56ba37a95f1f370f50258ed3954c304c524b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched.c b/kernel/sched.c index 545c6fc..8e2558c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch); DEFINE_TRACE(sched_migrate_task); #ifdef CONFIG_SMP + +static void double_rq_lock(struct rq *rq1, struct rq *rq2); + /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) * Since cpu_power is a 'constant', we can use a reciprocal divide. @@ -220,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) @@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) * slice expiry etc. */ -#define WEIGHT_IDLEPRIO 2 -#define WMULT_IDLEPRIO (1 << 31) +#define WEIGHT_IDLEPRIO 3 +#define WMULT_IDLEPRIO 1431655765 /* * Nice levels are multiplicative, with a gentle 10% change for every @@ -3728,8 +3731,13 @@ redo: } double_unlock_balance(this_rq, busiest); + /* + * Should not call ttwu while holding a rq->lock + */ + spin_unlock(&this_rq->lock); if (active_balance) wake_up_process(busiest->migration_thread); + spin_lock(&this_rq->lock); } else sd->nr_balance_failed = 0; @@ -3872,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick) int cpu = smp_processor_id(); if (stop_tick) { - cpumask_set_cpu(cpu, nohz.cpu_mask); cpu_rq(cpu)->in_nohz_recently = 1; - /* - * If we are going offline and still the leader, give up! - */ - if (!cpu_active(cpu) && - atomic_read(&nohz.load_balancer) == cpu) { + if (!cpu_active(cpu)) { + if (atomic_read(&nohz.load_balancer) != cpu) + return 0; + + /* + * If we are going offline and still the leader, + * give up! + */ if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) BUG(); + return 0; } + cpumask_set_cpu(cpu, nohz.cpu_mask); + /* time for ilb owner also to sleep */ if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { if (atomic_read(&nohz.load_balancer) == cpu) @@ -4432,7 +4445,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) return; /* * Is the spinlock portion underflowing? @@ -4679,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key) +void __wake_up_common(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, int sync, void *key) { wait_queue_t *curr, *next; @@ -5118,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice) * sys_setpriority is a more generic, but much slower function that * does similar things. */ -asmlinkage long sys_nice(int increment) +SYSCALL_DEFINE1(nice, int, increment) { long nice, retval; @@ -5425,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @policy: new policy. * @param: structure containing the new RT priority. */ -asmlinkage long -sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) +SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, + struct sched_param __user *, param) { /* negative values for policy are not valid */ if (policy < 0) @@ -5440,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @pid: the pid in question. * @param: structure containing the new RT priority. */ -asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) { return do_sched_setscheduler(pid, -1, param); } @@ -5449,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) * sys_sched_getscheduler - get the policy (scheduling class) of a thread * @pid: the pid in question. */ -asmlinkage long sys_sched_getscheduler(pid_t pid) +SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) { struct task_struct *p; int retval; @@ -5474,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) * @pid: the pid in question. * @param: structure containing the RT priority. */ -asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { struct sched_param lp; struct task_struct *p; @@ -5592,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to the new cpu mask */ -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { cpumask_var_t new_mask; int retval; @@ -5640,8 +5653,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask */ -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { int ret; cpumask_var_t mask; @@ -5670,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, * This function yields the current CPU to other tasks. If there are no * other threads running on this CPU then this function will return. */ -asmlinkage long sys_sched_yield(void) +SYSCALL_DEFINE0(sched_yield) { struct rq *rq = this_rq_lock(); @@ -5811,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout) * this syscall returns the maximum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_max(int policy) +SYSCALL_DEFINE1(sched_get_priority_max, int, policy) { int ret = -EINVAL; @@ -5836,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) * this syscall returns the minimum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_min(int policy) +SYSCALL_DEFINE1(sched_get_priority_min, int, policy) { int ret = -EINVAL; @@ -5861,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -asmlinkage -long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, + struct timespec __user *, interval) { struct task_struct *p; unsigned int time_slice; @@ -6931,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd) static void rq_attach_root(struct rq *rq, struct root_domain *rd) { + struct root_domain *old_rd = NULL; unsigned long flags; spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - struct root_domain *old_rd = rq->rd; + old_rd = rq->rd; if (cpumask_test_cpu(rq->cpu, old_rd->online)) set_rq_offline(rq); cpumask_clear_cpu(rq->cpu, old_rd->span); - if (atomic_dec_and_test(&old_rd->refcount)) - free_rootdomain(old_rd); + /* + * If we dont want to free the old_rt yet then + * set old_rd to NULL to skip the freeing later + * in this function: + */ + if (!atomic_dec_and_test(&old_rd->refcount)) + old_rd = NULL; } atomic_inc(&rd->refcount); @@ -6955,9 +6974,12 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); + + if (old_rd) + free_rootdomain(old_rd); } -static int init_rootdomain(struct root_domain *rd, bool bootmem) +static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) { memset(rd, 0, sizeof(*rd)); @@ -6970,7 +6992,7 @@ static int init_rootdomain(struct root_domain *rd, bool bootmem) } if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) - goto free_rd; + goto out; if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) goto free_span; if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) @@ -6986,8 +7008,7 @@ free_online: free_cpumask_var(rd->online); free_span: free_cpumask_var(rd->span); -free_rd: - kfree(rd); +out: return -ENOMEM; } @@ -7278,10 +7299,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, * groups, so roll our own. Now each node has its own list of groups which * gets dynamically allocated. */ -static DEFINE_PER_CPU(struct sched_domain, node_domains); +static DEFINE_PER_CPU(struct static_sched_domain, node_domains); static struct sched_group ***sched_group_nodes_bycpu; -static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); +static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, @@ -7556,7 +7577,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_NUMA if (cpumask_weight(cpu_map) > SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { - sd = &per_cpu(allnodes_domains, i); + sd = &per_cpu(allnodes_domains, i).sd; SD_INIT(sd, ALLNODES); set_domain_attribute(sd, attr); cpumask_copy(sched_domain_span(sd), cpu_map); @@ -7566,7 +7587,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, } else p = NULL; - sd = &per_cpu(node_domains, i); + sd = &per_cpu(node_domains, i).sd; SD_INIT(sd, NODE); set_domain_attribute(sd, attr); sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); @@ -7684,7 +7705,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for_each_cpu(j, nodemask) { struct sched_domain *sd; - sd = &per_cpu(node_domains, j); + sd = &per_cpu(node_domains, j).sd; sd->groups = sg; } sg->__cpu_power = 0; @@ -7987,7 +8008,7 @@ match2: } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static void arch_reinit_sched_domains(void) { get_online_cpus(); @@ -7996,13 +8017,10 @@ int arch_reinit_sched_domains(void) rebuild_sched_domains(); put_online_cpus(); - - return 0; } static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) { - int ret; unsigned int level = 0; if (sscanf(buf, "%u", &level) != 1) @@ -8023,9 +8041,9 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) else sched_mc_power_savings = level; - ret = arch_reinit_sched_domains(); + arch_reinit_sched_domains(); - return ret ? ret : count; + return count; } #ifdef CONFIG_SCHED_MC @@ -8060,7 +8078,7 @@ static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_store); #endif -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) { int err = 0; @@ -9046,6 +9064,13 @@ static int tg_schedulable(struct task_group *tg, void *data) runtime = d->rt_runtime; } +#ifdef CONFIG_USER_SCHED + if (tg == &root_task_group) { + period = global_rt_period(); + runtime = global_rt_runtime(); + } +#endif + /* * Cannot have more runtime than the period. */ @@ -9199,6 +9224,16 @@ static int sched_rt_global_constraints(void) return ret; } + +int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +{ + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) + return 0; + + return 1; +} + #else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { @@ -9292,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) + if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) return -EINVAL; #else /* We don't support RT-tasks being in separate groups */