X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsched.c;h=3c2a54f70ffed7ca1e0954666e3d333e43d36b95;hb=278554bd6579206921f5d8a523649a7a57f8850d;hp=0b914fc90a55d6c2cda4dfe676dbe4e9c904f747;hpb=83ab0aa0d5623d823444db82c3b3c34d7ec364ae;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sched.c b/kernel/sched.c index 0b914fc..3c2a54f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -322,6 +323,15 @@ static inline struct task_group *task_group(struct task_struct *p) /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { + /* + * Strictly speaking this rcu_read_lock() is not needed since the + * task_group is tied to the cgroup, which in turn can never go away + * as long as there are tasks attached to it. + * + * However since task_group() uses task_subsys_state() which is an + * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. + */ + rcu_read_lock(); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; p->se.parent = task_group(p)->se[cpu]; @@ -331,6 +341,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) p->rt.rt_rq = task_group(p)->rt_rq[cpu]; p->rt.parent = task_group(p)->rt_se[cpu]; #endif + rcu_read_unlock(); } #else @@ -602,6 +613,11 @@ static inline int cpu_of(struct rq *rq) #endif } +#define rcu_dereference_check_sched_domain(p) \ + rcu_dereference_check((p), \ + rcu_read_lock_sched_held() || \ + lockdep_is_held(&sched_domains_mutex)) + /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -610,7 +626,7 @@ static inline int cpu_of(struct rq *rq) * preempt-disabled sections. */ #define for_each_domain(cpu, __sd) \ - for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) + for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) @@ -1481,7 +1497,7 @@ static unsigned long target_load(int cpu, int type) static struct sched_group *group_of(int cpu) { - struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); + struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd); if (!sd) return NULL; @@ -1516,7 +1532,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED -static __read_mostly unsigned long *update_shares_data; +static __read_mostly unsigned long __percpu *update_shares_data; static void __set_se_shares(struct sched_entity *se, unsigned long shares); @@ -2354,7 +2370,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, { int cpu, orig_cpu, this_cpu, success = 0; unsigned long flags; - struct rq *rq, *orig_rq; + struct rq *rq; if (!sched_feat(SYNC_WAKEUPS)) wake_flags &= ~WF_SYNC; @@ -2362,7 +2378,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, this_cpu = get_cpu(); smp_wmb(); - rq = orig_rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &flags); update_rq_clock(rq); if (!(p->state & state)) goto out; @@ -2645,7 +2661,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { unsigned long flags; struct rq *rq; - int cpu = get_cpu(); + int cpu __maybe_unused = get_cpu(); #ifdef CONFIG_SMP /* @@ -2798,7 +2814,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_event_task_sched_in(current, cpu_of(rq)); +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_disable(); +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ + perf_event_task_sched_in(current); +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_enable(); +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); @@ -3504,7 +3526,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr, cpu); + perf_event_task_tick(curr); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -3718,7 +3740,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_event_task_sched_out(prev, next, cpu); + perf_event_task_sched_out(prev, next); rq->nr_switches++; rq->curr = next; @@ -3768,7 +3790,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) * the mutex owner just released it and exited. */ if (probe_kernel_address(&owner->cpu, cpu)) - goto out; + return 0; #else cpu = owner->cpu; #endif @@ -3778,14 +3800,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) * the cpu field may no longer be valid. */ if (cpu >= nr_cpumask_bits) - goto out; + return 0; /* * We need to validate that we can do a * get_cpu() and that we have the percpu area. */ if (!cpu_online(cpu)) - goto out; + return 0; rq = cpu_rq(cpu); @@ -3804,7 +3826,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) cpu_relax(); } -out: + return 1; } #endif @@ -4342,7 +4364,7 @@ int can_nice(const struct task_struct *p, const int nice) /* convert nice value [19,-20] to rlimit style value [1,40] */ int nice_rlim = 20 - nice; - return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || + return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || capable(CAP_SYS_NICE)); } @@ -4519,7 +4541,7 @@ recheck: if (!lock_task_sighand(p, &flags)) return -ESRCH; - rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur; + rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); unlock_task_sighand(p, &flags); /* can't set/change the rt policy */ @@ -4891,7 +4913,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, int ret; cpumask_var_t mask; - if (len < cpumask_size()) + if ((len * BITS_PER_BYTE) < nr_cpu_ids) + return -EINVAL; + if (len & (sizeof(unsigned long)-1)) return -EINVAL; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) @@ -4899,10 +4923,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, ret = sched_getaffinity(pid, mask); if (ret == 0) { - if (copy_to_user(user_mask_ptr, mask, cpumask_size())) + size_t retlen = min_t(size_t, len, cpumask_size()); + + if (copy_to_user(user_mask_ptr, mask, retlen)) ret = -EFAULT; else - ret = cpumask_size(); + ret = retlen; } free_cpumask_var(mask); @@ -5372,7 +5398,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) get_task_struct(mt); task_rq_unlock(rq, &flags); - wake_up_process(rq->migration_thread); + wake_up_process(mt); put_task_struct(mt); wait_for_completion(&req.done); tlb_migrate_finish(p->mm); @@ -7395,11 +7421,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) #ifdef CONFIG_SCHED_MC static ssize_t sched_mc_power_savings_show(struct sysdev_class *class, + struct sysdev_class_attribute *attr, char *page) { return sprintf(page, "%u\n", sched_mc_power_savings); } static ssize_t sched_mc_power_savings_store(struct sysdev_class *class, + struct sysdev_class_attribute *attr, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 0); @@ -7411,11 +7439,13 @@ static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644, #ifdef CONFIG_SCHED_SMT static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, char *page) { return sprintf(page, "%u\n", sched_smt_power_savings); } static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 1); @@ -8802,7 +8832,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ - u64 *cpuusage; + u64 __percpu *cpuusage; struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; };