sched: Try to deal with low capacity
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Tue, 1 Sep 2009 08:34:38 +0000 (10:34 +0200)
committerIngo Molnar <mingo@elte.hu>
Fri, 4 Sep 2009 08:09:55 +0000 (10:09 +0200)
When the capacity drops low, we want to migrate load away.
Allow the load-balancer to remove all tasks when we hit rock
bottom.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.342231003@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched.c

index ab532b5..5f5b359 100644 (file)
@@ -3908,8 +3908,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
        if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
                sgs->group_imb = 1;
 
-       sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
-
+       sgs->group_capacity =
+               DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
 }
 
 /**
@@ -3959,7 +3959,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                 * and move all the excess tasks away.
                 */
                if (prefer_sibling)
-                       sgs.group_capacity = 1;
+                       sgs.group_capacity = min(sgs.group_capacity, 1UL);
 
                if (local_group) {
                        sds->this_load = sgs.avg_load;
@@ -4191,6 +4191,26 @@ ret:
        return NULL;
 }
 
+static struct sched_group *group_of(int cpu)
+{
+       struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+
+       if (!sd)
+               return NULL;
+
+       return sd->groups;
+}
+
+static unsigned long power_of(int cpu)
+{
+       struct sched_group *group = group_of(cpu);
+
+       if (!group)
+               return SCHED_LOAD_SCALE;
+
+       return group->__cpu_power;
+}
+
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
@@ -4203,15 +4223,18 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
        int i;
 
        for_each_cpu(i, sched_group_cpus(group)) {
+               unsigned long power = power_of(i);
+               unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
                unsigned long wl;
 
                if (!cpumask_test_cpu(i, cpus))
                        continue;
 
                rq = cpu_rq(i);
-               wl = weighted_cpuload(i);
+               wl = weighted_cpuload(i) * SCHED_LOAD_SCALE;
+               wl /= power;
 
-               if (rq->nr_running == 1 && wl > imbalance)
+               if (capacity && rq->nr_running == 1 && wl > imbalance)
                        continue;
 
                if (wl > max_load) {