sh: convert /proc/cpu/aligmnent, /proc/cpu/kernel_alignment to seq_file
[safe/jmp/linux-2.6] / lib / proportions.c
index 332d8c5..d50746a 100644 (file)
 #include <linux/proportions.h>
 #include <linux/rcupdate.h>
 
-/*
- * Limit the time part in order to ensure there are some bits left for the
- * cycle counter.
- */
-#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
-
 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 {
        int err;
@@ -89,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
        pd->index = 0;
        pd->pg[0].shift = shift;
        mutex_init(&pd->mutex);
-       err = percpu_counter_init_irq(&pd->pg[0].events, 0);
+       err = percpu_counter_init(&pd->pg[0].events, 0);
        if (err)
                goto out;
 
-       err = percpu_counter_init_irq(&pd->pg[1].events, 0);
+       err = percpu_counter_init(&pd->pg[1].events, 0);
        if (err)
                percpu_counter_destroy(&pd->pg[0].events);
 
@@ -153,6 +147,7 @@ out:
  * this is used to track the active references.
  */
 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
+__acquires(RCU)
 {
        int index;
 
@@ -166,6 +161,7 @@ static struct prop_global *prop_get_global(struct prop_descriptor *pd)
 }
 
 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
+__releases(RCU)
 {
        rcu_read_unlock();
 }
@@ -190,12 +186,14 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  * PERCPU
  */
 
+#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
+
 int prop_local_init_percpu(struct prop_local_percpu *pl)
 {
        spin_lock_init(&pl->lock);
        pl->shift = 0;
        pl->period = 0;
-       return percpu_counter_init_irq(&pl->events, 0);
+       return percpu_counter_init(&pl->events, 0);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
@@ -230,31 +228,24 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 
        spin_lock_irqsave(&pl->lock, flags);
        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+
        /*
         * For each missed period, we half the local counter.
         * basically:
         *   pl->events >> (global_period - pl->period);
-        *
-        * but since the distributed nature of percpu counters make division
-        * rather hard, use a regular subtraction loop. This is safe, because
-        * the events will only every be incremented, hence the subtraction
-        * can never result in a negative number.
         */
-       while (pl->period != global_period) {
-               unsigned long val = percpu_counter_read(&pl->events);
-               unsigned long half = (val + 1) >> 1;
-
-               /*
-                * Half of zero won't be much less, break out.
-                * This limits the loop to shift iterations, even
-                * if we missed a million.
-                */
-               if (!val)
-                       break;
-
-               percpu_counter_add(&pl->events, -half);
-               pl->period += period;
-       }
+       period = (global_period - pl->period) >> (pg->shift - 1);
+       if (period < BITS_PER_LONG) {
+               s64 val = percpu_counter_read(&pl->events);
+
+               if (val < (nr_cpu_ids * PROP_BATCH))
+                       val = percpu_counter_sum(&pl->events);
+
+               __percpu_counter_add(&pl->events, -val + (val >> period),
+                                       PROP_BATCH);
+       } else
+               percpu_counter_set(&pl->events, 0);
+
        pl->period = global_period;
        spin_unlock_irqrestore(&pl->lock, flags);
 }
@@ -267,8 +258,40 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
        struct prop_global *pg = prop_get_global(pd);
 
        prop_norm_percpu(pg, pl);
+       __percpu_counter_add(&pl->events, 1, PROP_BATCH);
+       percpu_counter_add(&pg->events, 1);
+       prop_put_global(pd, pg);
+}
+
+/*
+ * identical to __prop_inc_percpu, except that it limits this pl's fraction to
+ * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
+ */
+void __prop_inc_percpu_max(struct prop_descriptor *pd,
+                          struct prop_local_percpu *pl, long frac)
+{
+       struct prop_global *pg = prop_get_global(pd);
+
+       prop_norm_percpu(pg, pl);
+
+       if (unlikely(frac != PROP_FRAC_BASE)) {
+               unsigned long period_2 = 1UL << (pg->shift - 1);
+               unsigned long counter_mask = period_2 - 1;
+               unsigned long global_count;
+               long numerator, denominator;
+
+               numerator = percpu_counter_read_positive(&pl->events);
+               global_count = percpu_counter_read(&pg->events);
+               denominator = period_2 + (global_count & counter_mask);
+
+               if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
+                       goto out_put;
+       }
+
        percpu_counter_add(&pl->events, 1);
        percpu_counter_add(&pg->events, 1);
+
+out_put:
        prop_put_global(pd, pg);
 }