ALSA: es968: fix wrong PnP dma index

[safe/jmp/linux-2.6] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index c0be079..6af210a 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
  #include <linux/debugfs.h>
  #include <linux/ctype.h>
  #include <linux/ftrace.h>
+#include <linux/slab.h>
  
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
@@ -233,7 +234,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
   */
  static DEFINE_MUTEX(sched_domains_mutex);
  
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
  
  #include <linux/cgroup.h>
  
@@ -243,13 +244,7 @@ static LIST_HEAD(task_groups);
  
  /* task group related information */
  struct task_group {
-#ifdef CONFIG_CGROUP_SCHED
         struct cgroup_subsys_state css;
-#endif
-
-#ifdef CONFIG_USER_SCHED
-       uid_t uid;
-#endif
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
         /* schedulable entities of this group on each cpu */
@@ -274,35 +269,7 @@ struct task_group {
         struct list_head children;
  };
  
-#ifdef CONFIG_USER_SCHED
-
-/* Helper function to pass uid information to create_sched_user() */
-void set_tg_uid(struct user_struct *user)
-{
-       user->tg->uid = user->uid;
-}
-
-/*
- * Root task group.
- *     Every UID task group (including init_task_group aka UID-0) will
- *     be a child to this group.
- */
-struct task_group root_task_group;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/* Default task group's sched entity on each cpu */
-static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
-/* Default task group's cfs_rq on each cpu */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
-#endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_USER_SCHED */
  #define root_task_group init_task_group
-#endif /* CONFIG_USER_SCHED */
  
  /* task_group_lock serializes add/remove of task groups and also changes to
   * a task group's cpu shares.
@@ -318,11 +285,7 @@ static int root_task_group_empty(void)
  }
  #endif
  
-#ifdef CONFIG_USER_SCHED
-# define INIT_TASK_GROUP_LOAD  (2*NICE_0_LOAD)
-#else /* !CONFIG_USER_SCHED */
  # define INIT_TASK_GROUP_LOAD  NICE_0_LOAD
-#endif /* CONFIG_USER_SCHED */
  
  /*
   * A weight of 0 or 1 can cause arithmetics problems.
@@ -348,11 +311,7 @@ static inline struct task_group *task_group(struct task_struct *p)
  {
         struct task_group *tg;
  
-#ifdef CONFIG_USER_SCHED
-       rcu_read_lock();
-       tg = __task_cred(p)->user->tg;
-       rcu_read_unlock();
-#elif defined(CONFIG_CGROUP_SCHED)
+#ifdef CONFIG_CGROUP_SCHED
         tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
                                 struct task_group, css);
  #else
@@ -383,7 +342,7 @@ static inline struct task_group *task_group(struct task_struct *p)
         return NULL;
  }
  
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
  
  /* CFS-related fields in a runqueue */
  struct cfs_rq {
@@ -478,7 +437,6 @@ struct rt_rq {
         struct rq *rq;
         struct list_head leaf_rt_rq_list;
         struct task_group *tg;
-       struct sched_rt_entity *rt_se;
  #endif
  };
  
@@ -645,6 +603,11 @@ static inline int cpu_of(struct rq *rq)
  #endif
  }
  
+#define rcu_dereference_check_sched_domain(p) \
+       rcu_dereference_check((p), \
+                             rcu_read_lock_sched_held() || \
+                             lockdep_is_held(&sched_domains_mutex))
+
  /*
   * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
   * See detach_destroy_domains: synchronize_sched for details.
@@ -653,7 +616,7 @@ static inline int cpu_of(struct rq *rq)
   * preempt-disabled sections.
   */
  #define for_each_domain(cpu, __sd) \
-       for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+       for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
  
  #define cpu_rq(cpu)            (&per_cpu(runqueues, (cpu)))
  #define this_rq()              (&__get_cpu_var(runqueues))
@@ -941,16 +904,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
  #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
  
  /*
+ * Check whether the task is waking, we use this to synchronize against
+ * ttwu() so that task_cpu() reports a stable number.
+ *
+ * We need to make an exception for PF_STARTING tasks because the fork
+ * path might require task_rq_lock() to work, eg. it can call
+ * set_cpus_allowed_ptr() from the cpuset clone_ns code.
+ */
+static inline int task_is_waking(struct task_struct *p)
+{
+       return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
+}
+
+/*
   * __task_rq_lock - lock the runqueue a given task resides on.
   * Must be called interrupts disabled.
   */
  static inline struct rq *__task_rq_lock(struct task_struct *p)
         __acquires(rq->lock)
  {
+       struct rq *rq;
+
         for (;;) {
-               struct rq *rq = task_rq(p);
+               while (task_is_waking(p))
+                       cpu_relax();
+               rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p)))
+               if (likely(rq == task_rq(p) && !task_is_waking(p)))
                         return rq;
                 raw_spin_unlock(&rq->lock);
         }
@@ -967,10 +947,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
         struct rq *rq;
  
         for (;;) {
+               while (task_is_waking(p))
+                       cpu_relax();
                 local_irq_save(*flags);
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p)))
+               if (likely(rq == task_rq(p) && !task_is_waking(p)))
                         return rq;
                 raw_spin_unlock_irqrestore(&rq->lock, *flags);
         }
@@ -1505,7 +1487,7 @@ static unsigned long target_load(int cpu, int type)
  
  static struct sched_group *group_of(int cpu)
  {
-       struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+       struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
  
         if (!sd)
                 return NULL;
@@ -1540,7 +1522,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
-static __read_mostly unsigned long *update_shares_data;
+static __read_mostly unsigned long __percpu *update_shares_data;
  
  static void __set_se_shares(struct sched_entity *se, unsigned long shares);
  
@@ -1675,16 +1657,6 @@ static void update_shares(struct sched_domain *sd)
         }
  }
  
-static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
-{
-       if (root_task_group_empty())
-               return;
-
-       raw_spin_unlock(&rq->lock);
-       update_shares(sd);
-       raw_spin_lock(&rq->lock);
-}
-
  static void update_h_load(long cpu)
  {
         if (root_task_group_empty())
@@ -1699,10 +1671,6 @@ static inline void update_shares(struct sched_domain *sd)
  {
  }
  
-static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
-{
-}
-
  #endif
  
  #ifdef CONFIG_PREEMPT
@@ -1898,13 +1866,14 @@ static void update_avg(u64 *avg, u64 sample)
         *avg += diff >> 3;
  }
  
-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void
+enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
  {
         if (wakeup)
                 p->se.start_runtime = p->se.sum_exec_runtime;
  
         sched_info_queued(p);
-       p->sched_class->enqueue_task(rq, p, wakeup);
+       p->sched_class->enqueue_task(rq, p, wakeup, head);
         p->se.on_rq = 1;
  }
  
@@ -1934,7 +1903,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
         if (task_contributes_to_load(p))
                 rq->nr_uninterruptible--;
  
-       enqueue_task(rq, p, wakeup);
+       enqueue_task(rq, p, wakeup, false);
         inc_nr_running(rq);
  }
  
@@ -2342,14 +2311,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  }
  
  /*
- * Called from:
- *
- *  - fork, @p is stable because it isn't on the tasklist yet
+ * Gets called from 3 sites (exec, fork, wakeup), since it is called without
+ * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
+ * by:
   *
- *  - exec, @p is unstable, retry loop
- *
- *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
- *             we should be good.
+ *  exec:           is unstable, retry loop
+ *  fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
   */
  static inline
  int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
@@ -2393,7 +2360,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
  {
         int cpu, orig_cpu, this_cpu, success = 0;
         unsigned long flags;
-       struct rq *rq, *orig_rq;
+       struct rq *rq;
  
         if (!sched_feat(SYNC_WAKEUPS))
                 wake_flags &= ~WF_SYNC;
@@ -2401,7 +2368,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
         this_cpu = get_cpu();
  
         smp_wmb();
-       rq = orig_rq = task_rq_lock(p, &flags);
+       rq = task_rq_lock(p, &flags);
         update_rq_clock(rq);
         if (!(p->state & state))
                 goto out;
@@ -2432,14 +2399,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
         __task_rq_unlock(rq);
  
         cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-       if (cpu != orig_cpu)
+       if (cpu != orig_cpu) {
+               /*
+                * Since we migrate the task without holding any rq->lock,
+                * we need to be careful with task_rq_lock(), since that
+                * might end up locking an invalid rq.
+                */
                 set_task_cpu(p, cpu);
+       }
  
-       rq = __task_rq_lock(p);
+       rq = cpu_rq(cpu);
+       raw_spin_lock(&rq->lock);
         update_rq_clock(rq);
  
+       /*
+        * We migrated the task without holding either rq->lock, however
+        * since the task is not on the task list itself, nobody else
+        * will try and migrate the task, hence the rq should match the
+        * cpu we just moved it to.
+        */
+       WARN_ON(task_cpu(p) != cpu);
         WARN_ON(p->state != TASK_WAKING);
-       cpu = task_cpu(p);
  
  #ifdef CONFIG_SCHEDSTATS
         schedstat_inc(rq, ttwu_count);
@@ -2642,9 +2622,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
         if (p->sched_class->task_fork)
                 p->sched_class->task_fork(p);
  
-#ifdef CONFIG_SMP
-       cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
-#endif
         set_task_cpu(p, cpu);
  
  #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2674,8 +2651,29 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
  {
         unsigned long flags;
         struct rq *rq;
+       int cpu __maybe_unused = get_cpu();
+
+#ifdef CONFIG_SMP
+       /*
+        * Fork balancing, do it here and not earlier because:
+        *  - cpus_allowed can change in the fork path
+        *  - any previously selected cpu might disappear through hotplug
+        *
+        * We still have TASK_WAKING but PF_STARTING is gone now, meaning
+        * ->cpus_allowed is stable, we have preemption disabled, meaning
+        * cpu_online_mask is stable.
+        */
+       cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+       set_task_cpu(p, cpu);
+#endif
+
+       /*
+        * Since the task is not on the rq and we still have TASK_WAKING set
+        * nobody else will migrate this task.
+        */
+       rq = cpu_rq(cpu);
+       raw_spin_lock_irqsave(&rq->lock, flags);
  
-       rq = task_rq_lock(p, &flags);
         BUG_ON(p->state != TASK_WAKING);
         p->state = TASK_RUNNING;
         update_rq_clock(rq);
@@ -2687,6 +2685,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                 p->sched_class->task_woken(rq, p);
  #endif
         task_rq_unlock(rq, &flags);
+       put_cpu();
  }
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -2805,7 +2804,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
          */
         prev_state = prev->state;
         finish_arch_switch(prev);
-       perf_event_task_sched_in(current, cpu_of(rq));
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+       local_irq_disable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+       perf_event_task_sched_in(current);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+       local_irq_enable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
         finish_lock_switch(rq, prev);
  
         fire_sched_in_preempt_notifiers(current);
@@ -3511,7 +3516,7 @@ void scheduler_tick(void)
         curr->sched_class->task_tick(rq, curr, 0);
         raw_spin_unlock(&rq->lock);
  
-       perf_event_task_tick(curr, cpu);
+       perf_event_task_tick(curr);
  
  #ifdef CONFIG_SMP
         rq->idle_at_tick = idle_cpu(cpu);
@@ -3725,7 +3730,7 @@ need_resched_nonpreemptible:
  
         if (likely(prev != next)) {
                 sched_info_switch(prev, next);
-               perf_event_task_sched_out(prev, next, cpu);
+               perf_event_task_sched_out(prev, next);
  
                 rq->nr_switches++;
                 rq->curr = next;
@@ -3743,8 +3748,11 @@ need_resched_nonpreemptible:
  
         post_schedule(rq);
  
-       if (unlikely(reacquire_kernel_lock(current) < 0))
+       if (unlikely(reacquire_kernel_lock(current) < 0)) {
+               prev = rq->curr;
+               switch_count = &prev->nivcsw;
                 goto need_resched_nonpreemptible;
+       }
  
         preempt_enable_no_resched();
         if (need_resched())
@@ -4253,7 +4261,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         unsigned long flags;
         int oldprio, on_rq, running;
         struct rq *rq;
-       const struct sched_class *prev_class = p->sched_class;
+       const struct sched_class *prev_class;
  
         BUG_ON(prio < 0 || prio > MAX_PRIO);
  
@@ -4261,6 +4269,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         update_rq_clock(rq);
  
         oldprio = p->prio;
+       prev_class = p->sched_class;
         on_rq = p->se.on_rq;
         running = task_current(rq, p);
         if (on_rq)
@@ -4278,7 +4287,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         if (running)
                 p->sched_class->set_curr_task(rq);
         if (on_rq) {
-               enqueue_task(rq, p, 0);
+               enqueue_task(rq, p, 0, oldprio < prio);
  
                 check_class_changed(rq, p, prev_class, oldprio, running);
         }
@@ -4322,7 +4331,7 @@ void set_user_nice(struct task_struct *p, long nice)
         delta = p->prio - old_prio;
  
         if (on_rq) {
-               enqueue_task(rq, p, 0);
+               enqueue_task(rq, p, 0, false);
                 /*
                  * If the task increased its priority or is running and
                  * lowered its priority, then reschedule its CPU:
@@ -4345,7 +4354,7 @@ int can_nice(const struct task_struct *p, const int nice)
         /* convert nice value [19,-20] to rlimit style value [1,40] */
         int nice_rlim = 20 - nice;
  
-       return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
+       return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
                 capable(CAP_SYS_NICE));
  }
  
@@ -4480,7 +4489,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
  {
         int retval, oldprio, oldpolicy = -1, on_rq, running;
         unsigned long flags;
-       const struct sched_class *prev_class = p->sched_class;
+       const struct sched_class *prev_class;
         struct rq *rq;
         int reset_on_fork;
  
@@ -4522,7 +4531,7 @@ recheck:
  
                         if (!lock_task_sighand(p, &flags))
                                 return -ESRCH;
-                       rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
+                       rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
                         unlock_task_sighand(p, &flags);
  
                         /* can't set/change the rt policy */
@@ -4594,6 +4603,7 @@ recheck:
         p->sched_reset_on_fork = reset_on_fork;
  
         oldprio = p->prio;
+       prev_class = p->sched_class;
         __setscheduler(rq, p, policy, param->sched_priority);
  
         if (running)
@@ -4893,7 +4903,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
         int ret;
         cpumask_var_t mask;
  
-       if (len < cpumask_size())
+       if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+               return -EINVAL;
+       if (len & (sizeof(unsigned long)-1))
                 return -EINVAL;
  
         if (!alloc_cpumask_var(&mask, GFP_KERNEL))
@@ -4901,10 +4913,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
  
         ret = sched_getaffinity(pid, mask);
         if (ret == 0) {
-               if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+               size_t retlen = min_t(size_t, len, cpumask_size());
+
+               if (copy_to_user(user_mask_ptr, mask, retlen))
                         ret = -EFAULT;
                 else
-                       ret = cpumask_size();
+                       ret = retlen;
         }
         free_cpumask_var(mask);
  
@@ -5344,23 +5358,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
         struct rq *rq;
         int ret = 0;
  
-       /*
-        * Since we rely on wake-ups to migrate sleeping tasks, don't change
-        * the ->cpus_allowed mask from under waking tasks, which would be
-        * possible when we change rq->lock in ttwu(), so synchronize against
-        * TASK_WAKING to avoid that.
-        */
-again:
-       while (p->state == TASK_WAKING)
-               cpu_relax();
-
         rq = task_rq_lock(p, &flags);
  
-       if (p->state == TASK_WAKING) {
-               task_rq_unlock(rq, &flags);
-               goto again;
-       }
-
         if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                 ret = -EINVAL;
                 goto out;
@@ -5389,7 +5388,7 @@ again:
  
                 get_task_struct(mt);
                 task_rq_unlock(rq, &flags);
-               wake_up_process(rq->migration_thread);
+               wake_up_process(mt);
                 put_task_struct(mt);
                 wait_for_completion(&req.done);
                 tlb_migrate_finish(p->mm);
@@ -7412,11 +7411,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
  
  #ifdef CONFIG_SCHED_MC
  static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
+                                          struct sysdev_class_attribute *attr,
                                            char *page)
  {
         return sprintf(page, "%u\n", sched_mc_power_savings);
  }
  static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
+                                           struct sysdev_class_attribute *attr,
                                             const char *buf, size_t count)
  {
         return sched_power_savings_store(buf, count, 0);
@@ -7428,11 +7429,13 @@ static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
  
  #ifdef CONFIG_SCHED_SMT
  static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
+                                           struct sysdev_class_attribute *attr,
                                             char *page)
  {
         return sprintf(page, "%u\n", sched_smt_power_savings);
  }
  static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
+                                            struct sysdev_class_attribute *attr,
                                              const char *buf, size_t count)
  {
         return sched_power_savings_store(buf, count, 1);
@@ -7647,7 +7650,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
         tg->rt_rq[cpu] = rt_rq;
         init_rt_rq(rt_rq, rq);
         rt_rq->tg = tg;
-       rt_rq->rt_se = rt_se;
         rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
         if (add)
                 list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
@@ -7678,9 +7680,6 @@ void __init sched_init(void)
  #ifdef CONFIG_RT_GROUP_SCHED
         alloc_size += 2 * nr_cpu_ids * sizeof(void **);
  #endif
-#ifdef CONFIG_USER_SCHED
-       alloc_size *= 2;
-#endif
  #ifdef CONFIG_CPUMASK_OFFSTACK
         alloc_size += num_possible_cpus() * cpumask_size();
  #endif
@@ -7694,13 +7693,6 @@ void __init sched_init(void)
                 init_task_group.cfs_rq = (struct cfs_rq **)ptr;
                 ptr += nr_cpu_ids * sizeof(void **);
  
-#ifdef CONFIG_USER_SCHED
-               root_task_group.se = (struct sched_entity **)ptr;
-               ptr += nr_cpu_ids * sizeof(void **);
-
-               root_task_group.cfs_rq = (struct cfs_rq **)ptr;
-               ptr += nr_cpu_ids * sizeof(void **);
-#endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  #ifdef CONFIG_RT_GROUP_SCHED
                 init_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -7709,13 +7701,6 @@ void __init sched_init(void)
                 init_task_group.rt_rq = (struct rt_rq **)ptr;
                 ptr += nr_cpu_ids * sizeof(void **);
  
-#ifdef CONFIG_USER_SCHED
-               root_task_group.rt_se = (struct sched_rt_entity **)ptr;
-               ptr += nr_cpu_ids * sizeof(void **);
-
-               root_task_group.rt_rq = (struct rt_rq **)ptr;
-               ptr += nr_cpu_ids * sizeof(void **);
-#endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
  #ifdef CONFIG_CPUMASK_OFFSTACK
                 for_each_possible_cpu(i) {
@@ -7735,22 +7720,13 @@ void __init sched_init(void)
  #ifdef CONFIG_RT_GROUP_SCHED
         init_rt_bandwidth(&init_task_group.rt_bandwidth,
                         global_rt_period(), global_rt_runtime());
-#ifdef CONFIG_USER_SCHED
-       init_rt_bandwidth(&root_task_group.rt_bandwidth,
-                       global_rt_period(), RUNTIME_INF);
-#endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
  
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
         list_add(&init_task_group.list, &task_groups);
         INIT_LIST_HEAD(&init_task_group.children);
  
-#ifdef CONFIG_USER_SCHED
-       INIT_LIST_HEAD(&root_task_group.children);
-       init_task_group.parent = &root_task_group;
-       list_add(&init_task_group.siblings, &root_task_group.children);
-#endif /* CONFIG_USER_SCHED */
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
  
  #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
         update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
@@ -7790,25 +7766,6 @@ void __init sched_init(void)
                  * directly in rq->cfs (i.e init_task_group->se[] = NULL).
                  */
                 init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
-#elif defined CONFIG_USER_SCHED
-               root_task_group.shares = NICE_0_LOAD;
-               init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, 0, NULL);
-               /*
-                * In case of task-groups formed thr' the user id of tasks,
-                * init_task_group represents tasks belonging to root user.
-                * Hence it forms a sibling of all subsequent groups formed.
-                * In this case, init_task_group gets only a fraction of overall
-                * system cpu resource, based on the weight assigned to root
-                * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished
-                * by letting tasks of init_task_group sit in a separate cfs_rq
-                * (init_tg_cfs_rq) and having one entity represent this group of
-                * tasks in rq->cfs (i.e init_task_group->se[] != NULL).
-                */
-               init_tg_cfs_entry(&init_task_group,
-                               &per_cpu(init_tg_cfs_rq, i),
-                               &per_cpu(init_sched_entity, i), i, 1,
-                               root_task_group.se[i]);
-
  #endif
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
@@ -7817,12 +7774,6 @@ void __init sched_init(void)
                 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
  #ifdef CONFIG_CGROUP_SCHED
                 init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
-#elif defined CONFIG_USER_SCHED
-               init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
-               init_tg_rt_entry(&init_task_group,
-                               &per_cpu(init_rt_rq_var, i),
-                               &per_cpu(init_sched_rt_entity, i), i, 1,
-                               root_task_group.rt_se[i]);
  #endif
  #endif
  
@@ -8218,7 +8169,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
  }
  #endif /* CONFIG_RT_GROUP_SCHED */
  
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
  static void free_sched_group(struct task_group *tg)
  {
         free_fair_sched_group(tg);
@@ -8323,11 +8274,11 @@ void sched_move_task(struct task_struct *tsk)
         if (unlikely(running))
                 tsk->sched_class->set_curr_task(rq);
         if (on_rq)
-               enqueue_task(rq, tsk, 0);
+               enqueue_task(rq, tsk, 0, false);
  
         task_rq_unlock(rq, &flags);
  }
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  static void __set_se_shares(struct sched_entity *se, unsigned long shares)
@@ -8469,13 +8420,6 @@ static int tg_schedulable(struct task_group *tg, void *data)
                 runtime = d->rt_runtime;
         }
  
-#ifdef CONFIG_USER_SCHED
-       if (tg == &root_task_group) {
-               period = global_rt_period();
-               runtime = global_rt_runtime();
-       }
-#endif
-
         /*
          * Cannot have more runtime than the period.
          */
@@ -8878,7 +8822,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
  struct cpuacct {
         struct cgroup_subsys_state css;
         /* cpuusage holds pointer to a u64-type object on every cpu */
-       u64 *cpuusage;
+       u64 __percpu *cpuusage;
         struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
         struct cpuacct *parent;
  };
@@ -9095,12 +9039,30 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
  }
  
  /*
+ * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
+ * in cputime_t units. As a result, cpuacct_update_stats calls
+ * percpu_counter_add with values large enough to always overflow the
+ * per cpu batch limit causing bad SMP scalability.
+ *
+ * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
+ * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
+ * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
+ */
+#ifdef CONFIG_SMP
+#define CPUACCT_BATCH  \
+       min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
+#else
+#define CPUACCT_BATCH  0
+#endif
+
+/*
   * Charge the system/user time to the task's accounting group.
   */
  static void cpuacct_update_stats(struct task_struct *tsk,
                 enum cpuacct_stat_index idx, cputime_t val)
  {
         struct cpuacct *ca;
+       int batch = CPUACCT_BATCH;
  
         if (unlikely(!cpuacct_subsys.active))
                 return;
@@ -9109,7 +9071,7 @@ static void cpuacct_update_stats(struct task_struct *tsk,
         ca = task_ca(tsk);
  
         do {
-               percpu_counter_add(&ca->cpustat[idx], val);
+               __percpu_counter_add(&ca->cpustat[idx], val, batch);
                 ca = ca->parent;
         } while (ca);
         rcu_read_unlock();