DEFINE_TRACE(sched_migrate_task);
#ifdef CONFIG_SMP
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
* Since cpu_power is a 'constant', we can use a reciprocal divide.
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rt_b->rt_period_timer.function = sched_rt_period_timer;
- rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
}
static inline int rt_bandwidth_enabled(void)
struct task_group *tg;
#ifdef CONFIG_USER_SCHED
- tg = p->user->tg;
+ rcu_read_lock();
+ tg = __task_cred(p)->user->tg;
+ rcu_read_unlock();
#elif defined(CONFIG_CGROUP_SCHED)
tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
struct task_group, css);
#ifdef CONFIG_SMP
struct cpupri cpupri;
#endif
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ /*
+ * Preferred wake up cpu nominated by sched_mc balance that will be
+ * used when most cpus are idle in the system indicating overall very
+ * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
+ */
+ unsigned int sched_mc_preferred_wakeup_cpu;
+#endif
};
/*
#ifdef CONFIG_SCHEDSTATS
/* latency stats */
struct sched_info rq_sched_info;
+ unsigned long long rq_cpu_time;
+ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
/* sys_sched_yield() stats */
unsigned int yld_exp_empty;
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
- rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
}
#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
* slice expiry etc.
*/
-#define WEIGHT_IDLEPRIO 2
-#define WMULT_IDLEPRIO (1 << 31)
+#define WEIGHT_IDLEPRIO 3
+#define WMULT_IDLEPRIO 1431655765
/*
* Nice levels are multiplicative, with a gentle 10% change for every
clock_offset = old_rq->clock - new_rq->clock;
+ trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+
#ifdef CONFIG_SCHEDSTATS
if (p->se.wait_start)
p->se.wait_start -= clock_offset;
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;
+ if (!sync) {
+ if (current->se.avg_overlap < sysctl_sched_migration_cost &&
+ p->se.avg_overlap < sysctl_sched_migration_cost)
+ sync = 1;
+ } else {
+ if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
+ p->se.avg_overlap >= sysctl_sched_migration_cost)
+ sync = 0;
+ }
+
#ifdef CONFIG_SMP
if (sched_feat(LB_WAKEUP_UPDATE)) {
struct sched_domain *sd;
smp_wmb();
rq = task_rq_lock(p, &flags);
+ update_rq_clock(rq);
old_state = p->state;
if (!(old_state & state))
goto out;
schedstat_inc(p, se.nr_wakeups_local);
else
schedstat_inc(p, se.nr_wakeups_remote);
- update_rq_clock(rq);
activate_task(rq, p, 1);
success = 1;
out_running:
- trace_sched_wakeup(rq, p);
+ trace_sched_wakeup(rq, p, success);
check_preempt_curr(rq, p, sync);
p->state = TASK_RUNNING;
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- trace_sched_wakeup_new(rq, p);
+ trace_sched_wakeup_new(rq, p, 1);
check_preempt_curr(rq, p, 0);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
|| unlikely(!cpu_active(dest_cpu)))
goto out;
- trace_sched_migrate_task(rq, p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
if (this == group_leader && group_leader != group_min) {
*imbalance = min_load_per_task;
+ if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+ cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+ cpumask_first(sched_group_cpus(group_leader));
+ }
return group_min;
}
#endif
}
if (!ld_moved) {
+ int active_balance = 0;
+
schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
return -1;
+
+ if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
+ return -1;
+
+ if (sd->nr_balance_failed++ < 2)
+ return -1;
+
+ /*
+ * The only task running in a non-idle cpu can be moved to this
+ * cpu in an attempt to completely freeup the other CPU
+ * package. The same method used to move task in load_balance()
+ * have been extended for load_balance_newidle() to speedup
+ * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
+ *
+ * The package power saving logic comes from
+ * find_busiest_group(). If there are no imbalance, then
+ * f_b_g() will return NULL. However when sched_mc={1,2} then
+ * f_b_g() will select a group from which a running task may be
+ * pulled to this cpu in order to make the other package idle.
+ * If there is no opportunity to make a package idle and if
+ * there are no imbalance, then f_b_g() will return NULL and no
+ * action will be taken in load_balance_newidle().
+ *
+ * Under normal task pull operation due to imbalance, there
+ * will be more than one task in the source run queue and
+ * move_tasks() will succeed. ld_moved will be true and this
+ * active balance code will not be triggered.
+ */
+
+ /* Lock busiest in correct order while this_rq is held */
+ double_lock_balance(this_rq, busiest);
+
+ /*
+ * don't kick the migration_thread, if the curr
+ * task on busiest cpu can't be moved to this_cpu
+ */
+ if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
+ double_unlock_balance(this_rq, busiest);
+ all_pinned = 1;
+ return ld_moved;
+ }
+
+ if (!busiest->active_balance) {
+ busiest->active_balance = 1;
+ busiest->push_cpu = this_cpu;
+ active_balance = 1;
+ }
+
+ double_unlock_balance(this_rq, busiest);
+ /*
+ * Should not call ttwu while holding a rq->lock
+ */
+ spin_unlock(&this_rq->lock);
+ if (active_balance)
+ wake_up_process(busiest->migration_thread);
+ spin_lock(&this_rq->lock);
+
} else
sd->nr_balance_failed = 0;
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
*/
-void account_user_time(struct task_struct *p, cputime_t cputime)
+void account_user_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
cputime64_t tmp;
+ /* Add user time to process. */
p->utime = cputime_add(p->utime, cputime);
+ p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
account_group_user_time(p, cputime);
/* Add user time to cpustat. */
* Account guest cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
*/
-static void account_guest_time(struct task_struct *p, cputime_t cputime)
+static void account_guest_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
{
cputime64_t tmp;
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
tmp = cputime_to_cputime64(cputime);
+ /* Add guest time to process. */
p->utime = cputime_add(p->utime, cputime);
+ p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
account_group_user_time(p, cputime);
p->gtime = cputime_add(p->gtime, cputime);
+ /* Add guest time to cpustat. */
cpustat->user = cputime64_add(cpustat->user, tmp);
cpustat->guest = cputime64_add(cpustat->guest, tmp);
}
/*
- * Account scaled user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user space since the last update
- */
-void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
-{
- p->utimescaled = cputime_add(p->utimescaled, cputime);
-}
-
-/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
*/
void account_system_time(struct task_struct *p, int hardirq_offset,
- cputime_t cputime)
+ cputime_t cputime, cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- struct rq *rq = this_rq();
cputime64_t tmp;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
- account_guest_time(p, cputime);
+ account_guest_time(p, cputime, cputime_scaled);
return;
}
+ /* Add system time to process. */
p->stime = cputime_add(p->stime, cputime);
+ p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
cpustat->irq = cputime64_add(cpustat->irq, tmp);
else if (softirq_count())
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
- else if (p != rq->idle)
- cpustat->system = cputime64_add(cpustat->system, tmp);
- else if (atomic_read(&rq->nr_iowait) > 0)
- cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
else
- cpustat->idle = cputime64_add(cpustat->idle, tmp);
+ cpustat->system = cputime64_add(cpustat->system, tmp);
+
/* Account for system time used */
acct_update_integrals(p);
}
/*
- * Account scaled system cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @hardirq_offset: the offset to subtract from hardirq_count()
- * @cputime: the cpu time spent in kernel space since the last update
+ * Account for involuntary wait time.
+ * @steal: the cpu time spent in involuntary wait
*/
-void account_system_time_scaled(struct task_struct *p, cputime_t cputime)
+void account_steal_time(cputime_t cputime)
{
- p->stimescaled = cputime_add(p->stimescaled, cputime);
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t cputime64 = cputime_to_cputime64(cputime);
+
+ cpustat->steal = cputime64_add(cpustat->steal, cputime64);
}
/*
- * Account for involuntary wait time.
- * @p: the process from which the cpu time has been stolen
- * @steal: the cpu time spent in involuntary wait
+ * Account for idle time.
+ * @cputime: the cpu time spent in idle wait
*/
-void account_steal_time(struct task_struct *p, cputime_t steal)
+void account_idle_time(cputime_t cputime)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- cputime64_t tmp = cputime_to_cputime64(steal);
+ cputime64_t cputime64 = cputime_to_cputime64(cputime);
struct rq *rq = this_rq();
- if (p == rq->idle) {
- p->stime = cputime_add(p->stime, steal);
- if (atomic_read(&rq->nr_iowait) > 0)
- cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
- else
- cpustat->idle = cputime64_add(cpustat->idle, tmp);
- } else
- cpustat->steal = cputime64_add(cpustat->steal, tmp);
+ if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+ else
+ cpustat->idle = cputime64_add(cpustat->idle, cputime64);
}
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+
+/*
+ * Account a single tick of cpu time.
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: indicates if the tick is a user or a system tick
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+ cputime_t one_jiffy = jiffies_to_cputime(1);
+ cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+ struct rq *rq = this_rq();
+
+ if (user_tick)
+ account_user_time(p, one_jiffy, one_jiffy_scaled);
+ else if (p != rq->idle)
+ account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+ one_jiffy_scaled);
+ else
+ account_idle_time(one_jiffy);
+}
+
+/*
+ * Account multiple ticks of steal time.
+ * @p: the process from which the cpu time has been stolen
+ * @ticks: number of stolen ticks
+ */
+void account_steal_ticks(unsigned long ticks)
+{
+ account_steal_time(jiffies_to_cputime(ticks));
+}
+
+/*
+ * Account multiple ticks of idle time.
+ * @ticks: number of stolen ticks
+ */
+void account_idle_ticks(unsigned long ticks)
+{
+ account_idle_time(jiffies_to_cputime(ticks));
+}
+
+#endif
+
/*
* Use precise platform statistics if available:
*/
/*
* Underflow?
*/
- if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
return;
/*
* Is the spinlock portion underflowing?
* sys_setpriority is a more generic, but much slower function that
* does similar things.
*/
-asmlinkage long sys_nice(int increment)
+SYSCALL_DEFINE1(nice, int, increment)
{
long nice, retval;
set_load_weight(p);
}
+/*
+ * check the target process has a UID that matches the current process's
+ */
+static bool check_same_owner(struct task_struct *p)
+{
+ const struct cred *cred = current_cred(), *pcred;
+ bool match;
+
+ rcu_read_lock();
+ pcred = __task_cred(p);
+ match = (cred->euid == pcred->euid ||
+ cred->euid == pcred->uid);
+ rcu_read_unlock();
+ return match;
+}
+
static int __sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param, bool user)
{
return -EPERM;
/* can't change other user's priorities */
- if ((current->euid != p->euid) &&
- (current->euid != p->uid))
+ if (!check_same_owner(p))
return -EPERM;
}
* @policy: new policy.
* @param: structure containing the new RT priority.
*/
-asmlinkage long
-sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
+ struct sched_param __user *, param)
{
/* negative values for policy are not valid */
if (policy < 0)
* @pid: the pid in question.
* @param: structure containing the new RT priority.
*/
-asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{
return do_sched_setscheduler(pid, -1, param);
}
* sys_sched_getscheduler - get the policy (scheduling class) of a thread
* @pid: the pid in question.
*/
-asmlinkage long sys_sched_getscheduler(pid_t pid)
+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
{
struct task_struct *p;
int retval;
* @pid: the pid in question.
* @param: structure containing the RT priority.
*/
-asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
{
struct sched_param lp;
struct task_struct *p;
goto out_free_cpus_allowed;
}
retval = -EPERM;
- if ((current->euid != p->euid) && (current->euid != p->uid) &&
- !capable(CAP_SYS_NICE))
+ if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
goto out_unlock;
retval = security_task_setscheduler(p, 0, NULL);
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask
*/
-asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
cpumask_var_t new_mask;
int retval;
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
*/
-asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
int ret;
cpumask_var_t mask;
* This function yields the current CPU to other tasks. If there are no
* other threads running on this CPU then this function will return.
*/
-asmlinkage long sys_sched_yield(void)
+SYSCALL_DEFINE0(sched_yield)
{
struct rq *rq = this_rq_lock();
* this syscall returns the maximum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_max(int policy)
+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
{
int ret = -EINVAL;
* this syscall returns the minimum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_min(int policy)
+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
{
int ret = -EINVAL;
* this syscall writes the default timeslice value of a given process
* into the user-space timespec buffer. A value of '0' means infinity.
*/
-asmlinkage
-long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ struct timespec __user *, interval)
{
struct task_struct *p;
unsigned int time_slice;
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
int dest_cpu;
- /* FIXME: Use cpumask_of_node here. */
- cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
- const struct cpumask *nodemask = &_nodemask;
+ const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
again:
/* Look for allowed, online CPU in same node. */
spin_unlock_irqrestore(&rq->lock, flags);
}
-static int init_rootdomain(struct root_domain *rd, bool bootmem)
+static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
{
memset(rd, 0, sizeof(*rd));
}
if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
- goto free_rd;
+ goto out;
if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
goto free_span;
if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
free_cpumask_var(rd->online);
free_span:
free_cpumask_var(rd->span);
-free_rd:
- kfree(rd);
+out:
return -ENOMEM;
}
static void sched_domain_node_span(int node, struct cpumask *span)
{
nodemask_t used_nodes;
- /* FIXME: use cpumask_of_node() */
- node_to_cpumask_ptr(nodemask, node);
int i;
- cpus_clear(*span);
+ cpumask_clear(span);
nodes_clear(used_nodes);
- cpus_or(*span, *span, *nodemask);
+ cpumask_or(span, span, cpumask_of_node(node));
node_set(node, used_nodes);
for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
int next_node = find_next_best_node(node, &used_nodes);
- node_to_cpumask_ptr_next(nodemask, next_node);
- cpus_or(*span, *span, *nodemask);
+ cpumask_or(span, span, cpumask_of_node(next_node));
}
}
#endif /* CONFIG_NUMA */
{
int group;
#ifdef CONFIG_SCHED_MC
- /* FIXME: Use cpu_coregroup_mask. */
- *mask = cpu_coregroup_map(cpu);
- cpus_and(*mask, *mask, *cpu_map);
+ cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
group = cpumask_first(mask);
#elif defined(CONFIG_SCHED_SMT)
cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
* groups, so roll our own. Now each node has its own list of groups which
* gets dynamically allocated.
*/
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
static struct sched_group ***sched_group_nodes_bycpu;
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
struct cpumask *nodemask)
{
int group;
- /* FIXME: use cpumask_of_node */
- node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
- cpumask_and(nodemask, pnodemask, cpu_map);
+ cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
group = cpumask_first(nodemask);
if (sg)
for (i = 0; i < nr_node_ids; i++) {
struct sched_group *oldsg, *sg = sched_group_nodes[i];
- /* FIXME: Use cpumask_of_node */
- node_to_cpumask_ptr(pnodemask, i);
- cpus_and(*nodemask, *pnodemask, *cpu_map);
+ cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
if (cpumask_empty(nodemask))
continue;
for_each_cpu(i, cpu_map) {
struct sched_domain *sd = NULL, *p;
- /* FIXME: use cpumask_of_node */
- *nodemask = node_to_cpumask(cpu_to_node(i));
- cpus_and(*nodemask, *nodemask, *cpu_map);
+ cpumask_and(nodemask, cpumask_of_node(cpu_to_node(i)), cpu_map);
#ifdef CONFIG_NUMA
if (cpumask_weight(cpu_map) >
SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
- sd = &per_cpu(allnodes_domains, i);
+ sd = &per_cpu(allnodes_domains, i).sd;
SD_INIT(sd, ALLNODES);
set_domain_attribute(sd, attr);
cpumask_copy(sched_domain_span(sd), cpu_map);
} else
p = NULL;
- sd = &per_cpu(node_domains, i);
+ sd = &per_cpu(node_domains, i).sd;
SD_INIT(sd, NODE);
set_domain_attribute(sd, attr);
sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
sd = &per_cpu(core_domains, i).sd;
SD_INIT(sd, MC);
set_domain_attribute(sd, attr);
- *sched_domain_span(sd) = cpu_coregroup_map(i);
- cpumask_and(sched_domain_span(sd),
- sched_domain_span(sd), cpu_map);
+ cpumask_and(sched_domain_span(sd), cpu_map,
+ cpu_coregroup_mask(i));
sd->parent = p;
p->child = sd;
cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
#ifdef CONFIG_SCHED_MC
/* Set up multi-core groups */
for_each_cpu(i, cpu_map) {
- /* FIXME: Use cpu_coregroup_mask */
- *this_core_map = cpu_coregroup_map(i);
- cpus_and(*this_core_map, *this_core_map, *cpu_map);
+ cpumask_and(this_core_map, cpu_coregroup_mask(i), cpu_map);
if (i != cpumask_first(this_core_map))
continue;
/* Set up physical groups */
for (i = 0; i < nr_node_ids; i++) {
- /* FIXME: Use cpumask_of_node */
- *nodemask = node_to_cpumask(i);
- cpus_and(*nodemask, *nodemask, *cpu_map);
+ cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
if (cpumask_empty(nodemask))
continue;
struct sched_group *sg, *prev;
int j;
- /* FIXME: Use cpumask_of_node */
- *nodemask = node_to_cpumask(i);
cpumask_clear(covered);
-
- cpus_and(*nodemask, *nodemask, *cpu_map);
+ cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
if (cpumask_empty(nodemask)) {
sched_group_nodes[i] = NULL;
continue;
for_each_cpu(j, nodemask) {
struct sched_domain *sd;
- sd = &per_cpu(node_domains, j);
+ sd = &per_cpu(node_domains, j).sd;
sd->groups = sg;
}
sg->__cpu_power = 0;
for (j = 0; j < nr_node_ids; j++) {
int n = (i + j) % nr_node_ids;
- /* FIXME: Use cpumask_of_node */
- node_to_cpumask_ptr(pnodemask, n);
cpumask_complement(notcovered, covered);
cpumask_and(tmpmask, notcovered, cpu_map);
if (cpumask_empty(tmpmask))
break;
- cpumask_and(tmpmask, tmpmask, pnodemask);
+ cpumask_and(tmpmask, tmpmask, cpumask_of_node(n));
if (cpumask_empty(tmpmask))
continue;
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
+static void arch_reinit_sched_domains(void)
{
get_online_cpus();
rebuild_sched_domains();
put_online_cpus();
-
- return 0;
}
static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
{
- int ret;
unsigned int level = 0;
if (sscanf(buf, "%u", &level) != 1)
else
sched_mc_power_savings = level;
- ret = arch_reinit_sched_domains();
+ arch_reinit_sched_domains();
- return ret ? ret : count;
+ return count;
}
#ifdef CONFIG_SCHED_MC
sched_smt_power_savings_store);
#endif
-int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
{
int err = 0;
runtime = d->rt_runtime;
}
+#ifdef CONFIG_USER_SCHED
+ if (tg == &root_task_group) {
+ period = global_rt_period();
+ runtime = global_rt_runtime();
+ }
+#endif
+
/*
* Cannot have more runtime than the period.
*/
kfree(ca);
}
+static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+{
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ u64 data;
+
+#ifndef CONFIG_64BIT
+ /*
+ * Take rq->lock to make 64-bit read safe on 32-bit platforms.
+ */
+ spin_lock_irq(&cpu_rq(cpu)->lock);
+ data = *cpuusage;
+ spin_unlock_irq(&cpu_rq(cpu)->lock);
+#else
+ data = *cpuusage;
+#endif
+
+ return data;
+}
+
+static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+{
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+
+#ifndef CONFIG_64BIT
+ /*
+ * Take rq->lock to make 64-bit write safe on 32-bit platforms.
+ */
+ spin_lock_irq(&cpu_rq(cpu)->lock);
+ *cpuusage = val;
+ spin_unlock_irq(&cpu_rq(cpu)->lock);
+#else
+ *cpuusage = val;
+#endif
+}
+
/* return total cpu usage (in nanoseconds) of a group */
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
{
u64 totalcpuusage = 0;
int i;
- for_each_possible_cpu(i) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
-
- /*
- * Take rq->lock to make 64-bit addition safe on 32-bit
- * platforms.
- */
- spin_lock_irq(&cpu_rq(i)->lock);
- totalcpuusage += *cpuusage;
- spin_unlock_irq(&cpu_rq(i)->lock);
- }
+ for_each_present_cpu(i)
+ totalcpuusage += cpuacct_cpuusage_read(ca, i);
return totalcpuusage;
}
goto out;
}
- for_each_possible_cpu(i) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
+ for_each_present_cpu(i)
+ cpuacct_cpuusage_write(ca, i, 0);
- spin_lock_irq(&cpu_rq(i)->lock);
- *cpuusage = 0;
- spin_unlock_irq(&cpu_rq(i)->lock);
- }
out:
return err;
}
+static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
+ struct seq_file *m)
+{
+ struct cpuacct *ca = cgroup_ca(cgroup);
+ u64 percpu;
+ int i;
+
+ for_each_present_cpu(i) {
+ percpu = cpuacct_cpuusage_read(ca, i);
+ seq_printf(m, "%llu ", (unsigned long long) percpu);
+ }
+ seq_printf(m, "\n");
+ return 0;
+}
+
static struct cftype files[] = {
{
.name = "usage",
.read_u64 = cpuusage_read,
.write_u64 = cpuusage_write,
},
+ {
+ .name = "usage_percpu",
+ .read_seq_string = cpuacct_percpu_seq_read,
+ },
+
};
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)