tunnels: fix netns vs proto registration ordering
[safe/jmp/linux-2.6] / kernel / time / tick-sched.c
index dc17ffc..f992762 100644 (file)
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz);
  * value. We do this unconditionally on any cpu, as we don't know whether the
  * cpu, which has the update task assigned is in a long sleep.
  */
-void tick_nohz_update_jiffies(void)
+static void tick_nohz_update_jiffies(ktime_t now)
 {
        int cpu = smp_processor_id();
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
        unsigned long flags;
-       ktime_t now;
-
-       if (!ts->tick_stopped)
-               return;
 
-       cpu_clear(cpu, nohz_cpu_mask);
-       now = ktime_get();
+       cpumask_clear_cpu(cpu, nohz_cpu_mask);
        ts->idle_waketime = now;
 
        local_irq_save(flags);
@@ -155,20 +150,17 @@ void tick_nohz_update_jiffies(void)
        touch_softlockup_watchdog();
 }
 
-static void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu, ktime_t now)
 {
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+       ktime_t delta;
 
-       if (ts->idle_active) {
-               ktime_t now, delta;
-               now = ktime_get();
-               delta = ktime_sub(now, ts->idle_entrytime);
-               ts->idle_lastupdate = now;
-               ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-               ts->idle_active = 0;
+       delta = ktime_sub(now, ts->idle_entrytime);
+       ts->idle_lastupdate = now;
+       ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+       ts->idle_active = 0;
 
-               sched_clock_idle_wakeup_event(0);
-       }
+       sched_clock_idle_wakeup_event(0);
 }
 
 static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -216,12 +208,29 @@ void tick_nohz_stop_sched_tick(int inidle)
        struct tick_sched *ts;
        ktime_t last_update, expires, now;
        struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+       u64 time_delta;
        int cpu;
 
        local_irq_save(flags);
 
        cpu = smp_processor_id();
        ts = &per_cpu(tick_cpu_sched, cpu);
+
+       /*
+        * Call to tick_nohz_start_idle stops the last_update_time from being
+        * updated. Thus, it must not be called in the event we are called from
+        * irq_exit() with the prior state different than idle.
+        */
+       if (!inidle && !ts->inidle)
+               goto end;
+
+       /*
+        * Set ts->inidle unconditionally. Even if the system did not
+        * switch to NOHZ mode the cpu frequency governers rely on the
+        * update of the idle time accounting in tick_nohz_start_idle().
+        */
+       ts->inidle = 1;
+
        now = tick_nohz_start_idle(ts);
 
        /*
@@ -239,11 +248,6 @@ void tick_nohz_stop_sched_tick(int inidle)
        if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
                goto end;
 
-       if (!inidle && !ts->inidle)
-               goto end;
-
-       ts->inidle = 1;
-
        if (need_resched())
                goto end;
 
@@ -252,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 
                if (ratelimit < 10) {
                        printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
-                              local_softirq_pending());
+                              (unsigned int) local_softirq_pending());
                        ratelimit++;
                }
                goto end;
@@ -264,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle)
                seq = read_seqbegin(&xtime_lock);
                last_update = last_jiffies_update;
                last_jiffies = jiffies;
+               time_delta = timekeeping_max_deferment();
        } while (read_seqretry(&xtime_lock, seq));
 
-       /* Get the next timer wheel timer */
-       next_jiffies = get_next_timer_interrupt(last_jiffies);
-       delta_jiffies = next_jiffies - last_jiffies;
-
-       if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
+       if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+           arch_needs_cpu(cpu)) {
+               next_jiffies = last_jiffies + 1;
                delta_jiffies = 1;
+       } else {
+               /* Get the next timer wheel timer */
+               next_jiffies = get_next_timer_interrupt(last_jiffies);
+               delta_jiffies = next_jiffies - last_jiffies;
+       }
        /*
         * Do not stop the tick, if we are only one off
         * or if the cpu is required for rcu
@@ -282,8 +290,60 @@ void tick_nohz_stop_sched_tick(int inidle)
        /* Schedule the tick, if we are at least one jiffie off */
        if ((long)delta_jiffies >= 1) {
 
+               /*
+                * If this cpu is the one which updates jiffies, then
+                * give up the assignment and let it be taken by the
+                * cpu which runs the tick timer next, which might be
+                * this cpu as well. If we don't drop this here the
+                * jiffies might be stale and do_timer() never
+                * invoked. Keep track of the fact that it was the one
+                * which had the do_timer() duty last. If this cpu is
+                * the one which had the do_timer() duty last, we
+                * limit the sleep time to the timekeeping
+                * max_deferement value which we retrieved
+                * above. Otherwise we can sleep as long as we want.
+                */
+               if (cpu == tick_do_timer_cpu) {
+                       tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+                       ts->do_timer_last = 1;
+               } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+                       time_delta = KTIME_MAX;
+                       ts->do_timer_last = 0;
+               } else if (!ts->do_timer_last) {
+                       time_delta = KTIME_MAX;
+               }
+
+               /*
+                * calculate the expiry time for the next timer wheel
+                * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
+                * that there is no timer pending or at least extremely
+                * far into the future (12 days for HZ=1000). In this
+                * case we set the expiry to the end of time.
+                */
+               if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
+                       /*
+                        * Calculate the time delta for the next timer event.
+                        * If the time delta exceeds the maximum time delta
+                        * permitted by the current clocksource then adjust
+                        * the time delta accordingly to ensure the
+                        * clocksource does not wrap.
+                        */
+                       time_delta = min_t(u64, time_delta,
+                                          tick_period.tv64 * delta_jiffies);
+               }
+
+               if (time_delta < KTIME_MAX)
+                       expires = ktime_add_ns(last_update, time_delta);
+               else
+                       expires.tv64 = KTIME_MAX;
+
                if (delta_jiffies > 1)
-                       cpu_set(cpu, nohz_cpu_mask);
+                       cpumask_set_cpu(cpu, nohz_cpu_mask);
+
+               /* Skip reprogram of event if its not changed */
+               if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
+                       goto out;
+
                /*
                 * nohz_stop_sched_tick can be called several times before
                 * the nohz_restart_sched_tick is called. This happens when
@@ -296,7 +356,7 @@ void tick_nohz_stop_sched_tick(int inidle)
                                /*
                                 * sched tick not stopped!
                                 */
-                               cpu_clear(cpu, nohz_cpu_mask);
+                               cpumask_clear_cpu(cpu, nohz_cpu_mask);
                                goto out;
                        }
 
@@ -306,43 +366,24 @@ void tick_nohz_stop_sched_tick(int inidle)
                        rcu_enter_nohz();
                }
 
-               /*
-                * If this cpu is the one which updates jiffies, then
-                * give up the assignment and let it be taken by the
-                * cpu which runs the tick timer next, which might be
-                * this cpu as well. If we don't drop this here the
-                * jiffies might be stale and do_timer() never
-                * invoked.
-                */
-               if (cpu == tick_do_timer_cpu)
-                       tick_do_timer_cpu = TICK_DO_TIMER_NONE;
-
                ts->idle_sleeps++;
 
+               /* Mark expires */
+               ts->idle_expires = expires;
+
                /*
-                * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
-                * there is no timer pending or at least extremly far
-                * into the future (12 days for HZ=1000). In this case
-                * we simply stop the tick timer:
+                * If the expiration time == KTIME_MAX, then
+                * in this case we simply stop the tick timer.
                 */
-               if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
-                       ts->idle_expires.tv64 = KTIME_MAX;
+                if (unlikely(expires.tv64 == KTIME_MAX)) {
                        if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
                                hrtimer_cancel(&ts->sched_timer);
                        goto out;
                }
 
-               /*
-                * calculate the expiry time for the next timer wheel
-                * timer
-                */
-               expires = ktime_add_ns(last_update, tick_period.tv64 *
-                                      delta_jiffies);
-               ts->idle_expires = expires;
-
                if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
                        hrtimer_start(&ts->sched_timer, expires,
-                                     HRTIMER_MODE_ABS);
+                                     HRTIMER_MODE_ABS_PINNED);
                        /* Check, if the timer was already in the past */
                        if (hrtimer_active(&ts->sched_timer))
                                goto out;
@@ -354,7 +395,7 @@ void tick_nohz_stop_sched_tick(int inidle)
                 * softirq.
                 */
                tick_do_update_jiffies64(ktime_get());
-               cpu_clear(cpu, nohz_cpu_mask);
+               cpumask_clear_cpu(cpu, nohz_cpu_mask);
        }
        raise_softirq_irqoff(TIMER_SOFTIRQ);
 out:
@@ -388,7 +429,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 
                if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
                        hrtimer_start_expires(&ts->sched_timer,
-                                     HRTIMER_MODE_ABS);
+                                             HRTIMER_MODE_ABS_PINNED);
                        /* Check, if the timer was already in the past */
                        if (hrtimer_active(&ts->sched_timer))
                                break;
@@ -412,11 +453,17 @@ void tick_nohz_restart_sched_tick(void)
 {
        int cpu = smp_processor_id();
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
        unsigned long ticks;
+#endif
        ktime_t now;
 
        local_irq_disable();
-       tick_nohz_stop_idle(cpu);
+       if (ts->idle_active || (ts->inidle && ts->tick_stopped))
+               now = ktime_get();
+
+       if (ts->idle_active)
+               tick_nohz_stop_idle(cpu, now);
 
        if (!ts->inidle || !ts->tick_stopped) {
                ts->inidle = 0;
@@ -430,10 +477,10 @@ void tick_nohz_restart_sched_tick(void)
 
        /* Update jiffies first */
        select_nohz_load_balancer(0);
-       now = ktime_get();
        tick_do_update_jiffies64(now);
-       cpu_clear(cpu, nohz_cpu_mask);
+       cpumask_clear_cpu(cpu, nohz_cpu_mask);
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
        /*
         * We stopped the tick in idle. Update process times would miss the
         * time we slept as update_process_times does only a 1 tick
@@ -443,12 +490,9 @@ void tick_nohz_restart_sched_tick(void)
        /*
         * We might be one off. Do not randomly account a huge number of ticks!
         */
-       if (ticks && ticks < LONG_MAX) {
-               add_preempt_count(HARDIRQ_OFFSET);
-               account_system_time(current, HARDIRQ_OFFSET,
-                                   jiffies_to_cputime(ticks));
-               sub_preempt_count(HARDIRQ_OFFSET);
-       }
+       if (ticks && ticks < LONG_MAX)
+               account_idle_ticks(ticks);
+#endif
 
        touch_softlockup_watchdog();
        /*
@@ -566,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void)
  * timer and do not touch the other magic bits which need to be done
  * when idle is left.
  */
-static void tick_nohz_kick_tick(int cpu)
+static void tick_nohz_kick_tick(int cpu, ktime_t now)
 {
 #if 0
        /* Switch back to 2.6.27 behaviour */
 
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-       ktime_t delta, now;
-
-       if (!ts->tick_stopped)
-               return;
+       ktime_t delta;
 
        /*
         * Do not touch the tick device, when the next expiry is either
         * already reached or less/equal than the tick period.
         */
-       now = ktime_get();
        delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
        if (delta.tv64 <= tick_period.tv64)
                return;
@@ -590,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu)
 #endif
 }
 
+static inline void tick_check_nohz(int cpu)
+{
+       struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+       ktime_t now;
+
+       if (!ts->idle_active && !ts->tick_stopped)
+               return;
+       now = ktime_get();
+       if (ts->idle_active)
+               tick_nohz_stop_idle(cpu, now);
+       if (ts->tick_stopped) {
+               tick_nohz_update_jiffies(now);
+               tick_nohz_kick_tick(cpu, now);
+       }
+}
+
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
+static inline void tick_check_nohz(int cpu) { }
 
 #endif /* NO_HZ */
 
@@ -602,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
 void tick_check_idle(int cpu)
 {
        tick_check_oneshot_broadcast(cpu);
-#ifdef CONFIG_NO_HZ
-       tick_nohz_stop_idle(cpu);
-       tick_nohz_update_jiffies();
-       tick_nohz_kick_tick(cpu);
-#endif
+       tick_check_nohz(cpu);
 }
 
 /*
@@ -681,7 +734,6 @@ void tick_setup_sched_timer(void)
         */
        hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
        ts->sched_timer.function = tick_sched_timer;
-       ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
        /* Get the next period (per cpu) */
        hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
@@ -692,7 +744,8 @@ void tick_setup_sched_timer(void)
 
        for (;;) {
                hrtimer_forward(&ts->sched_timer, now, tick_period);
-               hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
+               hrtimer_start_expires(&ts->sched_timer,
+                                     HRTIMER_MODE_ABS_PINNED);
                /* Check, if the timer was already in the past */
                if (hrtimer_active(&ts->sched_timer))
                        break;