X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fhrtimer.c;h=3e1c36e7998fdbeffa17142bade5c358c61a2857;hb=83daee06adeed7b294802c998d5e03ea7d856aa1;hp=0ad3f3d6d10d7ef0be062ad6c015d6f5686bfcf8;hpb=51bc39f4ba35bae153b32145077fb1109bcae14c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0ad3f3d..3e1c36e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -43,39 +43,12 @@ #include #include #include +#include +#include #include -/** - * ktime_get - get the monotonic time in ktime_t format - * - * returns the time in ktime_t format - */ -ktime_t ktime_get(void) -{ - struct timespec now; - - ktime_get_ts(&now); - - return timespec_to_ktime(now); -} -EXPORT_SYMBOL_GPL(ktime_get); - -/** - * ktime_get_real - get the real (wall-) time in ktime_t format - * - * returns the time in ktime_t format - */ -ktime_t ktime_get_real(void) -{ - struct timespec now; - - getnstimeofday(&now); - - return timespec_to_ktime(now); -} - -EXPORT_SYMBOL_GPL(ktime_get_real); +#include /* * The timer bases: @@ -104,31 +77,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = } }; -/** - * ktime_get_ts - get the monotonic clock in timespec format - * @ts: pointer to timespec variable - * - * The function calculates the monotonic clock from the realtime - * clock and the wall_to_monotonic offset and stores the result - * in normalized timespec format in the variable pointed to by @ts. - */ -void ktime_get_ts(struct timespec *ts) -{ - struct timespec tomono; - unsigned long seq; - - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(ts); - tomono = wall_to_monotonic; - - } while (read_seqretry(&xtime_lock, seq)); - - set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec); -} -EXPORT_SYMBOL_GPL(ktime_get_ts); - /* * Get the coarse grained time at the softirq based on xtime and * wall_to_monotonic. @@ -189,21 +137,65 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } } + +/* + * Get the preferred target CPU for NOHZ + */ +static int hrtimer_get_target(int this_cpu, int pinned) +{ +#ifdef CONFIG_NO_HZ + if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { + int preferred_cpu = get_nohz_load_balancer(); + + if (preferred_cpu >= 0) + return preferred_cpu; + } +#endif + return this_cpu; +} + +/* + * With HIGHRES=y we do not migrate the timer when it is expiring + * before the next event on the target cpu because we cannot reprogram + * the target cpu hardware and we would cause it to fire late. + * + * Called with cpu_base->lock of target cpu held. + */ +static int +hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +{ +#ifdef CONFIG_HIGH_RES_TIMERS + ktime_t expires; + + if (!new_base->cpu_base->hres_active) + return 0; + + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); + return expires.tv64 <= new_base->cpu_base->expires_next.tv64; +#else + return 0; +#endif +} + /* * Switch the timer base to the current CPU when possible. */ static inline struct hrtimer_clock_base * -switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) +switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, + int pinned) { struct hrtimer_clock_base *new_base; struct hrtimer_cpu_base *new_cpu_base; + int this_cpu = smp_processor_id(); + int cpu = hrtimer_get_target(this_cpu, pinned); - new_cpu_base = &__get_cpu_var(hrtimer_bases); +again: + new_cpu_base = &per_cpu(hrtimer_bases, cpu); new_base = &new_cpu_base->clock_base[base->index]; if (base != new_base) { /* - * We are trying to schedule the timer on the local CPU. + * We are trying to move timer to new_base. * However we can't change timer's base while it is running, * so we keep it on the same CPU. No hassle vs. reprogramming * the event source in the high resolution case. The softirq @@ -218,6 +210,14 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) timer->base = NULL; spin_unlock(&base->cpu_base->lock); spin_lock(&new_base->cpu_base->lock); + + if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { + cpu = this_cpu; + spin_unlock(&new_base->cpu_base->lock); + spin_lock(&base->cpu_base->lock); + timer->base = base; + goto again; + } timer->base = new_base; } return new_base; @@ -235,7 +235,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) return base; } -# define switch_hrtimer_base(t, b) (b) +# define switch_hrtimer_base(t, b, p) (b) #endif /* !CONFIG_SMP */ @@ -332,6 +332,8 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) return res; } +EXPORT_SYMBOL_GPL(ktime_add_safe); + #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr hrtimer_debug_descr; @@ -429,6 +431,7 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, debug_object_init_on_stack(timer, &hrtimer_debug_descr); __hrtimer_init(timer, clock_id, mode); } +EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); void destroy_hrtimer_on_stack(struct hrtimer *timer) { @@ -441,20 +444,24 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif -/* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) +static inline void +debug_init(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode) { - return timer->state & HRTIMER_STATE_PENDING; + debug_hrtimer_init(timer); + trace_hrtimer_init(timer, clockid, mode); } -/* - * Remove a timer from the callback pending list - */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) +static inline void debug_activate(struct hrtimer *timer) +{ + debug_hrtimer_activate(timer); + trace_hrtimer_start(timer); +} + +static inline void debug_deactivate(struct hrtimer *timer) { - list_del_init(&timer->cb_entry); + debug_hrtimer_deactivate(timer); + trace_hrtimer_cancel(timer); } /* High resolution timer related functions */ @@ -502,13 +509,14 @@ static inline int hrtimer_hres_active(void) * next event * Called with interrupts disabled and base->lock held */ -static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) +static void +hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { int i; struct hrtimer_clock_base *base = cpu_base->clock_base; - ktime_t expires; + ktime_t expires, expires_next; - cpu_base->expires_next.tv64 = KTIME_MAX; + expires_next.tv64 = KTIME_MAX; for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { struct hrtimer *timer; @@ -517,10 +525,22 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) continue; timer = rb_entry(base->first, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - if (expires.tv64 < cpu_base->expires_next.tv64) - cpu_base->expires_next = expires; + /* + * clock_was_set() has changed base->offset so the + * result might be negative. Fix it up to prevent a + * false positive in clockevents_program_event() + */ + if (expires.tv64 < 0) + expires.tv64 = 0; + if (expires.tv64 < expires_next.tv64) + expires_next = expires; } + if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) + return; + + cpu_base->expires_next.tv64 = expires_next.tv64; + if (cpu_base->expires_next.tv64 != KTIME_MAX) tick_program_event(cpu_base->expires_next, 1); } @@ -603,7 +623,7 @@ static void retrigger_next_event(void *arg) base->clock_base[CLOCK_REALTIME].offset = timespec_to_ktime(realtime_offset); - hrtimer_force_reprogram(base); + hrtimer_force_reprogram(base, 0); spin_unlock(&base->lock); } @@ -630,7 +650,9 @@ void clock_was_set(void) */ void hres_timers_resume(void) { - /* Retrigger the CPU local events: */ + WARN_ONCE(!irqs_disabled(), + KERN_INFO "hres_timers_resume() called with IRQs enabled!"); + retrigger_next_event(NULL); } @@ -650,6 +672,7 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } + /* * When High resolution timers are active, try to reprogram. Note, that in case * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry @@ -657,35 +680,20 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) * and expiry check is done in the hrtimer_interrupt or in the softirq. */ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + int wakeup) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { + if (wakeup) { + spin_unlock(&base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + spin_lock(&base->cpu_base->lock); + } else + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); - /* Timer is expired, act upon the callback mode */ - switch(timer->cb_mode) { - case HRTIMER_CB_IRQSAFE_PERCPU: - case HRTIMER_CB_IRQSAFE_UNLOCKED: - /* - * This is solely for the sched tick emulation with - * dynamic tick support to ensure that we do not - * restart the tick right on the edge and end up with - * the tick timer in the softirq ! The calling site - * takes care of this. Also used for hrtimer sleeper ! - */ - debug_hrtimer_deactivate(timer); - return 1; - case HRTIMER_CB_SOFTIRQ: - /* - * Move everything else into the softirq pending list ! - */ - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - timer->state = HRTIMER_STATE_PENDING; - return 1; - default: - BUG(); - } + return 1; } + return 0; } @@ -718,35 +726,24 @@ static int hrtimer_switch_to_hres(void) /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); - printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n", - smp_processor_id()); return 1; } -static inline void hrtimer_raise_softirq(void) -{ - raise_softirq(HRTIMER_SOFTIRQ); -} - #else static inline int hrtimer_hres_active(void) { return 0; } static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline int hrtimer_switch_to_hres(void) { return 0; } -static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } +static inline void +hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + int wakeup) { return 0; } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } -static inline int hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - return 0; -} -static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -817,16 +814,18 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); * * The timer is inserted in expiry order. Insertion into the * red black tree is O(log(n)). Must hold the base lock. + * + * Returns 1 when the new timer is the leftmost timer in the tree. */ -static void enqueue_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base, int reprogram) +static int enqueue_hrtimer(struct hrtimer *timer, + struct hrtimer_clock_base *base) { struct rb_node **link = &base->active.rb_node; struct rb_node *parent = NULL; struct hrtimer *entry; int leftmost = 1; - debug_hrtimer_activate(timer); + debug_activate(timer); /* * Find the right place in the rbtree: @@ -851,20 +850,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * Insert the timer to the rbtree and check whether it * replaces the first pending timer */ - if (leftmost) { - /* - * Reprogram the clock event device. When the timer is already - * expired hrtimer_enqueue_reprogram has either called the - * callback or added it to the pending list and raised the - * softirq. - * - * This is a NOP for !HIGHRES - */ - if (reprogram && hrtimer_enqueue_reprogram(timer, base)) - return; - + if (leftmost) base->first = &timer->node; - } rb_link_node(&timer->node, parent, link); rb_insert_color(&timer->node, &base->active); @@ -873,6 +860,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * state of a possibly running callback. */ timer->state |= HRTIMER_STATE_ENQUEUED; + + return leftmost; } /* @@ -889,22 +878,29 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, unsigned long newstate, int reprogram) { - /* High res. callback list. NOP for !HIGHRES */ - if (hrtimer_cb_pending(timer)) - hrtimer_remove_cb_pending(timer); - else { - /* - * Remove the timer from the rbtree and replace the - * first entry pointer if necessary. - */ - if (base->first == &timer->node) { - base->first = rb_next(&timer->node); - /* Reprogram the clock event device. if enabled */ - if (reprogram && hrtimer_hres_active()) - hrtimer_force_reprogram(base->cpu_base); + if (!(timer->state & HRTIMER_STATE_ENQUEUED)) + goto out; + + /* + * Remove the timer from the rbtree and replace the first + * entry pointer if necessary. + */ + if (base->first == &timer->node) { + base->first = rb_next(&timer->node); +#ifdef CONFIG_HIGH_RES_TIMERS + /* Reprogram the clock event device. if enabled */ + if (reprogram && hrtimer_hres_active()) { + ktime_t expires; + + expires = ktime_sub(hrtimer_get_expires(timer), + base->offset); + if (base->cpu_base->expires_next.tv64 == expires.tv64) + hrtimer_force_reprogram(base->cpu_base, 1); } - rb_erase(&timer->node, &base->active); +#endif } + rb_erase(&timer->node, &base->active); +out: timer->state = newstate; } @@ -925,7 +921,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ - debug_hrtimer_deactivate(timer); + debug_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, @@ -935,24 +931,13 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) return 0; } -/** - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU - * @timer: the timer to be added - * @tim: expiry time - * @delta_ns: "slack" range for the timer - * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) - * - * Returns: - * 0 on success - * 1 when the timer was active - */ -int -hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, - const enum hrtimer_mode mode) +int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, const enum hrtimer_mode mode, + int wakeup) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret, raise; + int ret, leftmost; base = lock_hrtimer_base(timer, &flags); @@ -960,9 +945,9 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n ret = remove_hrtimer(timer, base); /* Switch the timer base, if necessary: */ - new_base = switch_hrtimer_base(timer, base); + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - if (mode == HRTIMER_MODE_REL) { + if (mode & HRTIMER_MODE_REL) { tim = ktime_add_safe(tim, new_base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures @@ -980,35 +965,38 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n timer_stats_hrtimer_set_start_info(timer); + leftmost = enqueue_hrtimer(timer, new_base); + /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) + * + * XXX send_remote_softirq() ? */ - enqueue_hrtimer(timer, new_base, - new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); - - /* - * The timer may be expired and moved to the cb_pending - * list. We can not raise the softirq with base lock held due - * to a possible deadlock with runqueue lock. - */ - raise = timer->state == HRTIMER_STATE_PENDING; - - /* - * We use preempt_disable to prevent this task from migrating after - * setting up the softirq and raising it. Otherwise, if me migrate - * we will raise the softirq on the wrong CPU. - */ - preempt_disable(); + if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) + hrtimer_enqueue_reprogram(timer, new_base, wakeup); unlock_hrtimer_base(timer, &flags); - if (raise) - hrtimer_raise_softirq(); - preempt_enable(); - return ret; } + +/** + * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @delta_ns: "slack" range for the timer + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, const enum hrtimer_mode mode) +{ + return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1); +} EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); /** @@ -1024,7 +1012,7 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { - return hrtimer_start_range_ns(timer, tim, 0, mode); + return __hrtimer_start_range_ns(timer, tim, 0, mode, 1); } EXPORT_SYMBOL_GPL(hrtimer_start); @@ -1148,7 +1136,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; - INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS @@ -1167,7 +1154,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { - debug_hrtimer_init(timer); + debug_init(timer, clock_id, mode); __hrtimer_init(timer, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_init); @@ -1191,116 +1178,68 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); -static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) -{ - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - int emulate_hardirq_ctx = 0; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); - - debug_hrtimer_deactivate(timer); - timer_stats_account_hrtimer(timer); - - fn = timer->function; - /* - * A timer might have been added to the cb_pending list - * when it was migrated during a cpu-offline operation. - * Emulate hardirq context for such timers. - */ - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || - timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) - emulate_hardirq_ctx = 1; - - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - if (unlikely(emulate_hardirq_ctx)) { - local_irq_disable(); - restart = fn(timer); - local_irq_enable(); - } else - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - struct hrtimer_clock_base *base = timer->base; - - if (base->first == &timer->node && - hrtimer_reprogram(timer, base)) { - /* - * Timer is expired. Thus move it from tree to - * pending list again. - */ - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - } - } - } - spin_unlock_irq(&cpu_base->lock); -} - -static void __run_hrtimer(struct hrtimer *timer) +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) { struct hrtimer_clock_base *base = timer->base; struct hrtimer_cpu_base *cpu_base = base->cpu_base; enum hrtimer_restart (*fn)(struct hrtimer *); int restart; - debug_hrtimer_deactivate(timer); + WARN_ON(!irqs_disabled()); + + debug_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); - fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || - timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { - /* - * Used for scheduler timers, avoid lock inversion with - * rq->lock and tasklist_lock. - * - * These timers are required to deal with enqueue expiry - * themselves and are not allowed to migrate. - */ - spin_unlock(&cpu_base->lock); - restart = fn(timer); - spin_lock(&cpu_base->lock); - } else - restart = fn(timer); /* - * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid - * reprogramming of the event hardware. This happens at the end of this - * function anyway. + * Because we run timers from hardirq context, there is no chance + * they get migrated to another cpu, therefore its safe to unlock + * the timer base. + */ + spin_unlock(&cpu_base->lock); + trace_hrtimer_expire_entry(timer, now); + restart = fn(timer); + trace_hrtimer_expire_exit(timer); + spin_lock(&cpu_base->lock); + + /* + * Note: We clear the CALLBACK bit after enqueue_hrtimer and + * we do not reprogramm the event hardware. Happens either in + * hrtimer_start_range_ns() or in hrtimer_interrupt() */ if (restart != HRTIMER_NORESTART) { BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); - enqueue_hrtimer(timer, base, 0); + enqueue_hrtimer(timer, base); } timer->state &= ~HRTIMER_STATE_CALLBACK; } #ifdef CONFIG_HIGH_RES_TIMERS +static int force_clock_reprogram; + +/* + * After 5 iteration's attempts, we consider that hrtimer_interrupt() + * is hanging, which could happen with something that slows the interrupt + * such as the tracing. Then we force the clock reprogramming for each future + * hrtimer interrupts to avoid infinite loops and use the min_delta_ns + * threshold that we will overwrite. + * The next tick event will be scheduled to 3 times we currently spend on + * hrtimer_interrupt(). This gives a good compromise, the cpus will spend + * 1/4 of their time to process the hrtimer interrupts. This is enough to + * let it running without serious starvation. + */ + +static inline void +hrtimer_interrupt_hanging(struct clock_event_device *dev, + ktime_t try_time) +{ + force_clock_reprogram = 1; + dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; + printk(KERN_WARNING "hrtimer: interrupt too slow, " + "forcing clock min delta to %lu ns\n", dev->min_delta_ns); +} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1310,25 +1249,38 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; - int i, raise = 0; + int nr_retries = 0; + int i; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; retry: + /* 5 retries is enough to notice a hang */ + if (!(++nr_retries % 5)) + hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); + now = ktime_get(); expires_next.tv64 = KTIME_MAX; + spin_lock(&cpu_base->lock); + /* + * We set expires_next to KTIME_MAX here with cpu_base->lock + * held to prevent that a timer is enqueued in our queue via + * the migration code. This does not affect enqueueing of + * timers which run their callback and need to be requeued on + * this CPU. + */ + cpu_base->expires_next.tv64 = KTIME_MAX; + base = cpu_base->clock_base; for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { ktime_t basenow; struct rb_node *node; - spin_lock(&cpu_base->lock); - basenow = ktime_add(now, base->offset); while ((node = base->first)) { @@ -1359,33 +1311,39 @@ void hrtimer_interrupt(struct clock_event_device *dev) break; } - /* Move softirq callbacks to the pending list */ - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - raise = 1; - continue; - } - - __run_hrtimer(timer); + __run_hrtimer(timer, &basenow); } - spin_unlock(&cpu_base->lock); base++; } + /* + * Store the new expiry value so the migration code can verify + * against it. + */ cpu_base->expires_next = expires_next; + spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, 0)) + if (tick_program_event(expires_next, force_clock_reprogram)) goto retry; } +} + +/* + * local version of hrtimer_peek_ahead_timers() called with interrupts + * disabled. + */ +static void __hrtimer_peek_ahead_timers(void) +{ + struct tick_device *td; + + if (!hrtimer_hres_active()) + return; - /* Raise softirq ? */ - if (raise) - raise_softirq(HRTIMER_SOFTIRQ); + td = &__get_cpu_var(tick_cpu_device); + if (td && td->evtdev) + hrtimer_interrupt(td->evtdev); } /** @@ -1399,25 +1357,23 @@ void hrtimer_interrupt(struct clock_event_device *dev) */ void hrtimer_peek_ahead_timers(void) { - struct tick_device *td; unsigned long flags; - if (!hrtimer_hres_active()) - return; - local_irq_save(flags); - td = &__get_cpu_var(tick_cpu_device); - if (td && td->evtdev) - hrtimer_interrupt(td->evtdev); + __hrtimer_peek_ahead_timers(); local_irq_restore(flags); } static void run_hrtimer_softirq(struct softirq_action *h) { - run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); + hrtimer_peek_ahead_timers(); } -#endif /* CONFIG_HIGH_RES_TIMERS */ +#else /* CONFIG_HIGH_RES_TIMERS */ + +static inline void __hrtimer_peek_ahead_timers(void) { } + +#endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from timer softirq every jiffy, expire hrtimers: @@ -1428,8 +1384,6 @@ static void run_hrtimer_softirq(struct softirq_action *h) */ void hrtimer_run_pending(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - if (hrtimer_hres_active()) return; @@ -1443,8 +1397,6 @@ void hrtimer_run_pending(void) */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) hrtimer_switch_to_hres(); - - run_hrtimer_pending(cpu_base); } /* @@ -1481,15 +1433,7 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - continue; - } - - __run_hrtimer(timer); + __run_hrtimer(timer, &base->softirq_time); } spin_unlock(&cpu_base->lock); } @@ -1515,10 +1459,8 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; sl->task = task; -#ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; -#endif } +EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) { @@ -1627,8 +1569,8 @@ out: return ret; } -asmlinkage long -sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) +SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec tu; @@ -1654,35 +1596,21 @@ static void __cpuinit init_hrtimers_cpu(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; - INIT_LIST_HEAD(&cpu_base->cb_pending); hrtimer_init_hres(cpu_base); } #ifdef CONFIG_HOTPLUG_CPU -static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base, int dcpu) +static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + struct hrtimer_clock_base *new_base) { struct hrtimer *timer; struct rb_node *node; - int raise = 0; while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); - debug_hrtimer_deactivate(timer); - - /* - * Should not happen. Per CPU timers should be - * canceled _before_ the migration code is called - */ - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { - __remove_hrtimer(timer, old_base, - HRTIMER_STATE_INACTIVE, 0); - WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", - timer, timer->function, dcpu); - continue; - } + debug_deactivate(timer); /* * Mark it as STATE_MIGRATE not INACTIVE otherwise the @@ -1692,112 +1620,77 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); timer->base = new_base; /* - * Enqueue the timer. Allow reprogramming of the event device + * Enqueue the timers on the new cpu. This does not + * reprogram the event device in case the timer + * expires before the earliest on this CPU, but we run + * hrtimer_interrupt after we migrated everything to + * sort out already expired timers and reprogram the + * event device. */ - enqueue_hrtimer(timer, new_base, 1); + enqueue_hrtimer(timer, new_base); -#ifdef CONFIG_HIGH_RES_TIMERS - /* - * Happens with high res enabled when the timer was - * already expired and the callback mode is - * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The - * enqueue code does not move them to the soft irq - * pending list for performance/latency reasons, but - * in the migration state, we need to do that - * otherwise we end up with a stale timer. - */ - if (timer->state == HRTIMER_STATE_MIGRATE) { - timer->state = HRTIMER_STATE_PENDING; - list_add_tail(&timer->cb_entry, - &new_base->cpu_base->cb_pending); - raise = 1; - } -#endif /* Clear the migration state bit */ timer->state &= ~HRTIMER_STATE_MIGRATE; } - return raise; } -#ifdef CONFIG_HIGH_RES_TIMERS -static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base) -{ - struct hrtimer *timer; - int raise = 0; - - while (!list_empty(&old_base->cb_pending)) { - timer = list_entry(old_base->cb_pending.next, - struct hrtimer, cb_entry); - - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0); - timer->base = &new_base->clock_base[timer->base->index]; - list_add_tail(&timer->cb_entry, &new_base->cb_pending); - raise = 1; - } - return raise; -} -#else -static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base) -{ - return 0; -} -#endif - -static void migrate_hrtimers(int cpu) +static void migrate_hrtimers(int scpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i, raise = 0; + int i; - BUG_ON(cpu_online(cpu)); - old_base = &per_cpu(hrtimer_bases, cpu); - new_base = &get_cpu_var(hrtimer_bases); + BUG_ON(cpu_online(scpu)); + tick_cancel_sched_timer(scpu); - tick_cancel_sched_timer(cpu); + local_irq_disable(); + old_base = &per_cpu(hrtimer_bases, scpu); + new_base = &__get_cpu_var(hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); + spin_lock(&new_base->lock); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - if (migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i], cpu)) - raise = 1; + migrate_hrtimer_list(&old_base->clock_base[i], + &new_base->clock_base[i]); } - if (migrate_hrtimer_pending(old_base, new_base)) - raise = 1; - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); - put_cpu_var(hrtimer_bases); + spin_unlock(&new_base->lock); - if (raise) - hrtimer_raise_softirq(); + /* Check, if we got expired work to do */ + __hrtimer_peek_ahead_timers(); + local_irq_enable(); } + #endif /* CONFIG_HOTPLUG_CPU */ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (long)hcpu; + int scpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - init_hrtimers_cpu(cpu); + init_hrtimers_cpu(scpu); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DYING: + case CPU_DYING_FROZEN: + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); + break; case CPU_DEAD: case CPU_DEAD_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); - migrate_hrtimers(cpu); + { + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); + migrate_hrtimers(scpu); break; + } #endif default: