trace: print ftrace_dump at KERN_EMERG log level

[safe/jmp/linux-2.6] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 27ba1d6..52bbf1c 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
  DEFINE_TRACE(sched_migrate_task);
  
  #ifdef CONFIG_SMP
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
  /*
   * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
   * Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
   * slice expiry etc.
   */
  
-#define WEIGHT_IDLEPRIO                2
-#define WMULT_IDLEPRIO         (1 << 31)
+#define WEIGHT_IDLEPRIO                3
+#define WMULT_IDLEPRIO         1431655765
  
  /*
   * Nice levels are multiplicative, with a gentle 10% change for every
@@ -3715,7 +3718,7 @@ redo:
                  * don't kick the migration_thread, if the curr
                  * task on busiest cpu can't be moved to this_cpu
                  */
-               if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+               if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
                         double_unlock_balance(this_rq, busiest);
                         all_pinned = 1;
                         return ld_moved;
@@ -3728,8 +3731,13 @@ redo:
                 }
  
                 double_unlock_balance(this_rq, busiest);
+               /*
+                * Should not call ttwu while holding a rq->lock
+                */
+               spin_unlock(&this_rq->lock);
                 if (active_balance)
                         wake_up_process(busiest->migration_thread);
+               spin_lock(&this_rq->lock);
  
         } else
                 sd->nr_balance_failed = 0;
@@ -4150,13 +4158,17 @@ unsigned long long task_delta_exec(struct task_struct *p)
   * Account user cpu time to a process.
   * @p: the process that the cpu time gets accounted to
   * @cputime: the cpu time spent in user space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
   */
-void account_user_time(struct task_struct *p, cputime_t cputime)
+void account_user_time(struct task_struct *p, cputime_t cputime,
+                      cputime_t cputime_scaled)
  {
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
         cputime64_t tmp;
  
+       /* Add user time to process. */
         p->utime = cputime_add(p->utime, cputime);
+       p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
         account_group_user_time(p, cputime);
  
         /* Add user time to cpustat. */
@@ -4173,51 +4185,48 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
   * Account guest cpu time to a process.
   * @p: the process that the cpu time gets accounted to
   * @cputime: the cpu time spent in virtual machine since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
   */
-static void account_guest_time(struct task_struct *p, cputime_t cputime)
+static void account_guest_time(struct task_struct *p, cputime_t cputime,
+                              cputime_t cputime_scaled)
  {
         cputime64_t tmp;
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
  
         tmp = cputime_to_cputime64(cputime);
  
+       /* Add guest time to process. */
         p->utime = cputime_add(p->utime, cputime);
+       p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
         account_group_user_time(p, cputime);
         p->gtime = cputime_add(p->gtime, cputime);
  
+       /* Add guest time to cpustat. */
         cpustat->user = cputime64_add(cpustat->user, tmp);
         cpustat->guest = cputime64_add(cpustat->guest, tmp);
  }
  
  /*
- * Account scaled user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user space since the last update
- */
-void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
-{
-       p->utimescaled = cputime_add(p->utimescaled, cputime);
-}
-
-/*
   * Account system cpu time to a process.
   * @p: the process that the cpu time gets accounted to
   * @hardirq_offset: the offset to subtract from hardirq_count()
   * @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
   */
  void account_system_time(struct task_struct *p, int hardirq_offset,
-                        cputime_t cputime)
+                        cputime_t cputime, cputime_t cputime_scaled)
  {
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-       struct rq *rq = this_rq();
         cputime64_t tmp;
  
         if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
-               account_guest_time(p, cputime);
+               account_guest_time(p, cputime, cputime_scaled);
                 return;
         }
  
+       /* Add system time to process. */
         p->stime = cputime_add(p->stime, cputime);
+       p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
         account_group_system_time(p, cputime);
  
         /* Add system time to cpustat. */
@@ -4226,48 +4235,84 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
                 cpustat->irq = cputime64_add(cpustat->irq, tmp);
         else if (softirq_count())
                 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-       else if (p != rq->idle)
-               cpustat->system = cputime64_add(cpustat->system, tmp);
-       else if (atomic_read(&rq->nr_iowait) > 0)
-               cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
         else
-               cpustat->idle = cputime64_add(cpustat->idle, tmp);
+               cpustat->system = cputime64_add(cpustat->system, tmp);
+
         /* Account for system time used */
         acct_update_integrals(p);
  }
  
  /*
- * Account scaled system cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @hardirq_offset: the offset to subtract from hardirq_count()
- * @cputime: the cpu time spent in kernel space since the last update
+ * Account for involuntary wait time.
+ * @steal: the cpu time spent in involuntary wait
   */
-void account_system_time_scaled(struct task_struct *p, cputime_t cputime)
+void account_steal_time(cputime_t cputime)
  {
-       p->stimescaled = cputime_add(p->stimescaled, cputime);
+       struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+       cputime64_t cputime64 = cputime_to_cputime64(cputime);
+
+       cpustat->steal = cputime64_add(cpustat->steal, cputime64);
  }
  
  /*
- * Account for involuntary wait time.
- * @p: the process from which the cpu time has been stolen
- * @steal: the cpu time spent in involuntary wait
+ * Account for idle time.
+ * @cputime: the cpu time spent in idle wait
   */
-void account_steal_time(struct task_struct *p, cputime_t steal)
+void account_idle_time(cputime_t cputime)
  {
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-       cputime64_t tmp = cputime_to_cputime64(steal);
+       cputime64_t cputime64 = cputime_to_cputime64(cputime);
         struct rq *rq = this_rq();
  
-       if (p == rq->idle) {
-               p->stime = cputime_add(p->stime, steal);
-               if (atomic_read(&rq->nr_iowait) > 0)
-                       cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
-               else
-                       cpustat->idle = cputime64_add(cpustat->idle, tmp);
-       } else
-               cpustat->steal = cputime64_add(cpustat->steal, tmp);
+       if (atomic_read(&rq->nr_iowait) > 0)
+               cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+       else
+               cpustat->idle = cputime64_add(cpustat->idle, cputime64);
  }
  
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+
+/*
+ * Account a single tick of cpu time.
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: indicates if the tick is a user or a system tick
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+       cputime_t one_jiffy = jiffies_to_cputime(1);
+       cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+       struct rq *rq = this_rq();
+
+       if (user_tick)
+               account_user_time(p, one_jiffy, one_jiffy_scaled);
+       else if (p != rq->idle)
+               account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+                                   one_jiffy_scaled);
+       else
+               account_idle_time(one_jiffy);
+}
+
+/*
+ * Account multiple ticks of steal time.
+ * @p: the process from which the cpu time has been stolen
+ * @ticks: number of stolen ticks
+ */
+void account_steal_ticks(unsigned long ticks)
+{
+       account_steal_time(jiffies_to_cputime(ticks));
+}
+
+/*
+ * Account multiple ticks of idle time.
+ * @ticks: number of stolen ticks
+ */
+void account_idle_ticks(unsigned long ticks)
+{
+       account_idle_time(jiffies_to_cputime(ticks));
+}
+
+#endif
+
  /*
   * Use precise platform statistics if available:
   */
@@ -4395,7 +4440,7 @@ void __kprobes sub_preempt_count(int val)
         /*
          * Underflow?
          */
-       if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
+       if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
                 return;
         /*
          * Is the spinlock portion underflowing?
@@ -5081,7 +5126,7 @@ int can_nice(const struct task_struct *p, const int nice)
   * sys_setpriority is a more generic, but much slower function that
   * does similar things.
   */
-asmlinkage long sys_nice(int increment)
+SYSCALL_DEFINE1(nice, int, increment)
  {
         long nice, retval;
  
@@ -5388,8 +5433,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
   * @policy: new policy.
   * @param: structure containing the new RT priority.
   */
-asmlinkage long
-sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
+               struct sched_param __user *, param)
  {
         /* negative values for policy are not valid */
         if (policy < 0)
@@ -5403,7 +5448,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
   * @pid: the pid in question.
   * @param: structure containing the new RT priority.
   */
-asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
  {
         return do_sched_setscheduler(pid, -1, param);
  }
@@ -5412,7 +5457,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
   * sys_sched_getscheduler - get the policy (scheduling class) of a thread
   * @pid: the pid in question.
   */
-asmlinkage long sys_sched_getscheduler(pid_t pid)
+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
  {
         struct task_struct *p;
         int retval;
@@ -5437,7 +5482,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
   * @pid: the pid in question.
   * @param: structure containing the RT priority.
   */
-asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
  {
         struct sched_param lp;
         struct task_struct *p;
@@ -5555,8 +5600,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
   * @len: length in bytes of the bitmask pointed to by user_mask_ptr
   * @user_mask_ptr: user-space pointer to the new cpu mask
   */
-asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
-                                     unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+               unsigned long __user *, user_mask_ptr)
  {
         cpumask_var_t new_mask;
         int retval;
@@ -5603,8 +5648,8 @@ out_unlock:
   * @len: length in bytes of the bitmask pointed to by user_mask_ptr
   * @user_mask_ptr: user-space pointer to hold the current cpu mask
   */
-asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
-                                     unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+               unsigned long __user *, user_mask_ptr)
  {
         int ret;
         cpumask_var_t mask;
@@ -5633,7 +5678,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
   * This function yields the current CPU to other tasks. If there are no
   * other threads running on this CPU then this function will return.
   */
-asmlinkage long sys_sched_yield(void)
+SYSCALL_DEFINE0(sched_yield)
  {
         struct rq *rq = this_rq_lock();
  
@@ -5774,7 +5819,7 @@ long __sched io_schedule_timeout(long timeout)
   * this syscall returns the maximum rt_priority that can be used
   * by a given scheduling class.
   */
-asmlinkage long sys_sched_get_priority_max(int policy)
+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
  {
         int ret = -EINVAL;
  
@@ -5799,7 +5844,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
   * this syscall returns the minimum rt_priority that can be used
   * by a given scheduling class.
   */
-asmlinkage long sys_sched_get_priority_min(int policy)
+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
  {
         int ret = -EINVAL;
  
@@ -5824,8 +5869,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
   * this syscall writes the default timeslice value of a given process
   * into the user-space timespec buffer. A value of '0' means infinity.
   */
-asmlinkage
-long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+               struct timespec __user *, interval)
  {
         struct task_struct *p;
         unsigned int time_slice;
@@ -6220,9 +6265,7 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
  static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
  {
         int dest_cpu;
-       /* FIXME: Use cpumask_of_node here. */
-       cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
-       const struct cpumask *nodemask = &_nodemask;
+       const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
  
  again:
         /* Look for allowed, online CPU in same node. */
@@ -6922,7 +6965,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
         spin_unlock_irqrestore(&rq->lock, flags);
  }
  
-static int init_rootdomain(struct root_domain *rd, bool bootmem)
+static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
  {
         memset(rd, 0, sizeof(*rd));
  
@@ -6935,7 +6978,7 @@ static int init_rootdomain(struct root_domain *rd, bool bootmem)
         }
  
         if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
-               goto free_rd;
+               goto out;
         if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
                 goto free_span;
         if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
@@ -6951,8 +6994,7 @@ free_online:
         free_cpumask_var(rd->online);
  free_span:
         free_cpumask_var(rd->span);
-free_rd:
-       kfree(rd);
+out:
         return -ENOMEM;
  }
  
@@ -7133,21 +7175,18 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
  static void sched_domain_node_span(int node, struct cpumask *span)
  {
         nodemask_t used_nodes;
-       /* FIXME: use cpumask_of_node() */
-       node_to_cpumask_ptr(nodemask, node);
         int i;
  
-       cpus_clear(*span);
+       cpumask_clear(span);
         nodes_clear(used_nodes);
  
-       cpus_or(*span, *span, *nodemask);
+       cpumask_or(span, span, cpumask_of_node(node));
         node_set(node, used_nodes);
  
         for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
                 int next_node = find_next_best_node(node, &used_nodes);
  
-               node_to_cpumask_ptr_next(nodemask, next_node);
-               cpus_or(*span, *span, *nodemask);
+               cpumask_or(span, span, cpumask_of_node(next_node));
         }
  }
  #endif /* CONFIG_NUMA */
@@ -7227,9 +7266,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
  {
         int group;
  #ifdef CONFIG_SCHED_MC
-       /* FIXME: Use cpu_coregroup_mask. */
-       *mask = cpu_coregroup_map(cpu);
-       cpus_and(*mask, *mask, *cpu_map);
+       cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
         group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
         cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
@@ -7248,10 +7285,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
   * groups, so roll our own. Now each node has its own list of groups which
   * gets dynamically allocated.
   */
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
  static struct sched_group ***sched_group_nodes_bycpu;
  
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
  static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
  
  static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7259,10 +7296,8 @@ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
                                  struct cpumask *nodemask)
  {
         int group;
-       /* FIXME: use cpumask_of_node */
-       node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
  
-       cpumask_and(nodemask, pnodemask, cpu_map);
+       cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
         group = cpumask_first(nodemask);
  
         if (sg)
@@ -7313,10 +7348,8 @@ static void free_sched_groups(const struct cpumask *cpu_map,
  
                 for (i = 0; i < nr_node_ids; i++) {
                         struct sched_group *oldsg, *sg = sched_group_nodes[i];
-                       /* FIXME: Use cpumask_of_node */
-                       node_to_cpumask_ptr(pnodemask, i);
  
-                       cpus_and(*nodemask, *pnodemask, *cpu_map);
+                       cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
                         if (cpumask_empty(nodemask))
                                 continue;
  
@@ -7525,14 +7558,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
         for_each_cpu(i, cpu_map) {
                 struct sched_domain *sd = NULL, *p;
  
-               /* FIXME: use cpumask_of_node */
-               *nodemask = node_to_cpumask(cpu_to_node(i));
-               cpus_and(*nodemask, *nodemask, *cpu_map);
+               cpumask_and(nodemask, cpumask_of_node(cpu_to_node(i)), cpu_map);
  
  #ifdef CONFIG_NUMA
                 if (cpumask_weight(cpu_map) >
                                 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
-                       sd = &per_cpu(allnodes_domains, i);
+                       sd = &per_cpu(allnodes_domains, i).sd;
                         SD_INIT(sd, ALLNODES);
                         set_domain_attribute(sd, attr);
                         cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7542,7 +7573,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 } else
                         p = NULL;
  
-               sd = &per_cpu(node_domains, i);
+               sd = &per_cpu(node_domains, i).sd;
                 SD_INIT(sd, NODE);
                 set_domain_attribute(sd, attr);
                 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7568,9 +7599,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 sd = &per_cpu(core_domains, i).sd;
                 SD_INIT(sd, MC);
                 set_domain_attribute(sd, attr);
-               *sched_domain_span(sd) = cpu_coregroup_map(i);
-               cpumask_and(sched_domain_span(sd),
-                           sched_domain_span(sd), cpu_map);
+               cpumask_and(sched_domain_span(sd), cpu_map,
+                                                  cpu_coregroup_mask(i));
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7606,9 +7636,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
  #ifdef CONFIG_SCHED_MC
         /* Set up multi-core groups */
         for_each_cpu(i, cpu_map) {
-               /* FIXME: Use cpu_coregroup_mask */
-               *this_core_map = cpu_coregroup_map(i);
-               cpus_and(*this_core_map, *this_core_map, *cpu_map);
+               cpumask_and(this_core_map, cpu_coregroup_mask(i), cpu_map);
                 if (i != cpumask_first(this_core_map))
                         continue;
  
@@ -7620,9 +7648,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
  
         /* Set up physical groups */
         for (i = 0; i < nr_node_ids; i++) {
-               /* FIXME: Use cpumask_of_node */
-               *nodemask = node_to_cpumask(i);
-               cpus_and(*nodemask, *nodemask, *cpu_map);
+               cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
                 if (cpumask_empty(nodemask))
                         continue;
  
@@ -7644,11 +7670,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 struct sched_group *sg, *prev;
                 int j;
  
-               /* FIXME: Use cpumask_of_node */
-               *nodemask = node_to_cpumask(i);
                 cpumask_clear(covered);
-
-               cpus_and(*nodemask, *nodemask, *cpu_map);
+               cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
                 if (cpumask_empty(nodemask)) {
                         sched_group_nodes[i] = NULL;
                         continue;
@@ -7668,7 +7691,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 for_each_cpu(j, nodemask) {
                         struct sched_domain *sd;
  
-                       sd = &per_cpu(node_domains, j);
+                       sd = &per_cpu(node_domains, j).sd;
                         sd->groups = sg;
                 }
                 sg->__cpu_power = 0;
@@ -7679,8 +7702,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
  
                 for (j = 0; j < nr_node_ids; j++) {
                         int n = (i + j) % nr_node_ids;
-                       /* FIXME: Use cpumask_of_node */
-                       node_to_cpumask_ptr(pnodemask, n);
  
                         cpumask_complement(notcovered, covered);
                         cpumask_and(tmpmask, notcovered, cpu_map);
@@ -7688,7 +7709,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                         if (cpumask_empty(tmpmask))
                                 break;
  
-                       cpumask_and(tmpmask, tmpmask, pnodemask);
+                       cpumask_and(tmpmask, tmpmask, cpumask_of_node(n));
                         if (cpumask_empty(tmpmask))
                                 continue;
  
@@ -7973,7 +7994,7 @@ match2:
  }
  
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
+static void arch_reinit_sched_domains(void)
  {
         get_online_cpus();
  
@@ -7982,13 +8003,10 @@ int arch_reinit_sched_domains(void)
  
         rebuild_sched_domains();
         put_online_cpus();
-
-       return 0;
  }
  
  static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
  {
-       int ret;
         unsigned int level = 0;
  
         if (sscanf(buf, "%u", &level) != 1)
@@ -8009,9 +8027,9 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
         else
                 sched_mc_power_savings = level;
  
-       ret = arch_reinit_sched_domains();
+       arch_reinit_sched_domains();
  
-       return ret ? ret : count;
+       return count;
  }
  
  #ifdef CONFIG_SCHED_MC
@@ -8046,7 +8064,7 @@ static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
                    sched_smt_power_savings_store);
  #endif
  
-int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
  {
         int err = 0;
  
@@ -9032,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
                 runtime = d->rt_runtime;
         }
  
+#ifdef CONFIG_USER_SCHED
+       if (tg == &root_task_group) {
+               period = global_rt_period();
+               runtime = global_rt_runtime();
+       }
+#endif
+
         /*
          * Cannot have more runtime than the period.
          */