X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fhrtimer.c;h=b8e4dce80a748dc06c94ed6cb43225903c7f99c1;hb=1a781a777b2f6ac46523fe92396215762ced624d;hp=5e7122d3f46f410fdf4617ef880bcf00695f4c39;hpb=13788ccc41ceea5893f9c747c59bc0b28f2416c2;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5e7122d..b8e4dce 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -59,6 +60,7 @@ ktime_t ktime_get(void) return timespec_to_ktime(now); } +EXPORT_SYMBOL_GPL(ktime_get); /** * ktime_get_real - get the real (wall-) time in ktime_t format @@ -135,35 +137,23 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) { ktime_t xtim, tomono; - struct timespec xts; + struct timespec xts, tom; unsigned long seq; do { seq = read_seqbegin(&xtime_lock); -#ifdef CONFIG_NO_HZ - getnstimeofday(&xts); -#else - xts = xtime; -#endif + xts = current_kernel_time(); + tom = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); xtim = timespec_to_ktime(xts); - tomono = timespec_to_ktime(wall_to_monotonic); + tomono = timespec_to_ktime(tom); base->clock_base[CLOCK_REALTIME].softirq_time = xtim; base->clock_base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); } /* - * Helper function to check, whether the timer is running the callback - * function - */ -static inline int hrtimer_callback_running(struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_CALLBACK; -} - -/* * Functions and macros which are different for UP/SMP systems are kept in a * single place */ @@ -277,18 +267,43 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) return ktime_add(kt, tmp); } + +EXPORT_SYMBOL_GPL(ktime_add_ns); + +/** + * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable + * @kt: minuend + * @nsec: the scalar nsec value to subtract + * + * Returns the subtraction of @nsec from @kt in ktime_t format + */ +ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) +{ + ktime_t tmp; + + if (likely(nsec < NSEC_PER_SEC)) { + tmp.tv64 = nsec; + } else { + unsigned long rem = do_div(nsec, NSEC_PER_SEC); + + tmp = ktime_set((long)nsec, rem); + } + + return ktime_sub(kt, tmp); +} + +EXPORT_SYMBOL_GPL(ktime_sub_ns); # endif /* !CONFIG_KTIME_SCALAR */ /* * Divide a ktime value by a nanosecond value */ -unsigned long ktime_divns(const ktime_t kt, s64 div) +u64 ktime_divns(const ktime_t kt, s64 div) { - u64 dclc, inc, dns; + u64 dclc; int sft = 0; - dclc = dns = ktime_to_ns(kt); - inc = div; + dclc = ktime_to_ns(kt); /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; @@ -297,10 +312,152 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) dclc >>= sft; do_div(dclc, (unsigned long) div); - return (unsigned long) dclc; + return dclc; } #endif /* BITS_PER_LONG >= 64 */ +/* + * Add two ktime values and do a safety check for overflow: + */ +ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) +{ + ktime_t res = ktime_add(lhs, rhs); + + /* + * We use KTIME_SEC_MAX here, the maximum timeout which we can + * return to user space in a timespec: + */ + if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64) + res = ktime_set(KTIME_SEC_MAX, 0); + + return res; +} + +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr hrtimer_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int hrtimer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_init(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state) +{ + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + WARN_ON_ONCE(1); + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_free(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr hrtimer_debug_descr = { + .name = "hrtimer", + .fixup_init = hrtimer_fixup_init, + .fixup_activate = hrtimer_fixup_activate, + .fixup_free = hrtimer_fixup_free, +}; + +static inline void debug_hrtimer_init(struct hrtimer *timer) +{ + debug_object_init(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_activate(struct hrtimer *timer) +{ + debug_object_activate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) +{ + debug_object_deactivate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_free(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode); + +void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_object_init_on_stack(timer, &hrtimer_debug_descr); + __hrtimer_init(timer, clock_id, mode); +} + +void destroy_hrtimer_on_stack(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +#else +static inline void debug_hrtimer_init(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } +#endif + +/* + * Check, whether the timer is on the callback pending list + */ +static inline int hrtimer_cb_pending(const struct hrtimer *timer) +{ + return timer->state & HRTIMER_STATE_PENDING; +} + +/* + * Remove a timer from the callback pending list + */ +static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) +{ + list_del_init(&timer->cb_entry); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -385,16 +542,27 @@ static int hrtimer_reprogram(struct hrtimer *timer, ktime_t expires = ktime_sub(timer->expires, base->offset); int res; + WARN_ON_ONCE(timer->expires.tv64 < 0); + /* * When the callback is running, we do not reprogram the clock event * device. The timer callback is either running on a different CPU or - * the callback is executed in the hrtimer_interupt context. The + * the callback is executed in the hrtimer_interrupt context. The * reprogramming is handled either by the softirq, which called the * callback or at the end of the hrtimer_interrupt. */ if (hrtimer_callback_running(timer)) return 0; + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Nothing wrong + * about that, just avoid to call into the tick code, which + * has now objections against negative expiry values. + */ + if (expires.tv64 < 0) + return -ETIME; + if (expires.tv64 >= expires_next->tv64) return 0; @@ -454,23 +622,17 @@ static void retrigger_next_event(void *arg) void clock_was_set(void) { /* Retrigger the CPU local events everywhere */ - on_each_cpu(retrigger_next_event, NULL, 0, 1); -} - -/* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_PENDING; + on_each_cpu(retrigger_next_event, NULL, 1); } /* - * Remove a timer from the callback pending list + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) +void hres_timers_resume(void) { - list_del_init(&timer->cb_entry); + /* Retrigger the CPU local events: */ + retrigger_next_event(NULL); } /* @@ -480,7 +642,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next.tv64 = KTIME_MAX; base->hres_active = 0; - INIT_LIST_HEAD(&base->cb_pending); } /* @@ -488,7 +649,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) */ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { - INIT_LIST_HEAD(&timer->cb_entry); } /* @@ -505,6 +665,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* Timer is expired, act upon the callback mode */ switch(timer->cb_mode) { case HRTIMER_CB_IRQSAFE_NO_RESTART: + debug_hrtimer_deactivate(timer); /* * We can call the callback from here. No restart * happens, so no danger of recursion @@ -519,6 +680,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, * the tick timer in the softirq ! The calling site * takes care of this. */ + debug_hrtimer_deactivate(timer); return 1; case HRTIMER_CB_IRQSAFE: case HRTIMER_CB_SOFTIRQ: @@ -528,7 +690,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, list_add_tail(&timer->cb_entry, &base->cpu_base->cb_pending); timer->state = HRTIMER_STATE_PENDING; - raise_softirq(HRTIMER_SOFTIRQ); return 1; default: BUG(); @@ -542,7 +703,8 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, */ static int hrtimer_switch_to_hres(void) { - struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + int cpu = smp_processor_id(); + struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); unsigned long flags; if (base->hres_active) @@ -552,6 +714,8 @@ static int hrtimer_switch_to_hres(void) if (tick_init_highres()) { local_irq_restore(flags); + printk(KERN_WARNING "Could not switch to high resolution " + "mode on CPU %d\n", cpu); return 0; } base->hres_active = 1; @@ -563,11 +727,16 @@ static int hrtimer_switch_to_hres(void) /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); - printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", + printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n", smp_processor_id()); return 1; } +static inline void hrtimer_raise_softirq(void) +{ + raise_softirq(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -579,10 +748,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, { return 0; } -static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static inline int hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + return 0; +} +static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -599,7 +772,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) #endif /* - * Counterpart to lock_timer_base above: + * Counterpart to lock_hrtimer_base above: */ static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) @@ -616,10 +789,9 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) * Forward the timer expiry so it will expire in the future. * Returns the number of overruns. */ -unsigned long -hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) +u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) { - unsigned long orun = 1; + u64 orun = 1; ktime_t delta; delta = ktime_sub(now, timer->expires); @@ -643,16 +815,11 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) */ orun++; } - timer->expires = ktime_add(timer->expires, interval); - /* - * Make sure, that the result did not wrap with a very large - * interval. - */ - if (timer->expires.tv64 < 0) - timer->expires = ktime_set(KTIME_SEC_MAX, 0); + timer->expires = ktime_add_safe(timer->expires, interval); return orun; } +EXPORT_SYMBOL_GPL(hrtimer_forward); /* * enqueue_hrtimer - internal function to (re)start a timer @@ -666,6 +833,9 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct rb_node **link = &base->active.rb_node; struct rb_node *parent = NULL; struct hrtimer *entry; + int leftmost = 1; + + debug_hrtimer_activate(timer); /* * Find the right place in the rbtree: @@ -677,18 +847,19 @@ static void enqueue_hrtimer(struct hrtimer *timer, * We dont care about collisions. Nodes with * the same expiry time stay together. */ - if (timer->expires.tv64 < entry->expires.tv64) + if (timer->expires.tv64 < entry->expires.tv64) { link = &(*link)->rb_left; - else + } else { link = &(*link)->rb_right; + leftmost = 0; + } } /* * Insert the timer to the rbtree and check whether it * replaces the first pending timer */ - if (!base->first || timer->expires.tv64 < - rb_entry(base->first, struct hrtimer, node)->expires.tv64) { + if (leftmost) { /* * Reprogram the clock event device. When the timer is already * expired hrtimer_enqueue_reprogram has either called the @@ -762,6 +933,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ + debug_hrtimer_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, @@ -786,7 +958,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret; + int ret, raise; base = lock_hrtimer_base(timer, &flags); @@ -797,7 +969,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) new_base = switch_hrtimer_base(timer, base); if (mode == HRTIMER_MODE_REL) { - tim = ktime_add(tim, new_base->get_time()); + tim = ktime_add_safe(tim, new_base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in @@ -806,17 +978,41 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) * timeouts. This will go away with the GTOD framework. */ #ifdef CONFIG_TIME_LOW_RES - tim = ktime_add(tim, base->resolution); + tim = ktime_add_safe(tim, base->resolution); #endif } + timer->expires = tim; timer_stats_hrtimer_set_start_info(timer); - enqueue_hrtimer(timer, new_base, base == new_base); + /* + * Only allow reprogramming if the new base is on this CPU. + * (it might still be on another CPU if the timer was pending) + */ + enqueue_hrtimer(timer, new_base, + new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); + + /* + * The timer may be expired and moved to the cb_pending + * list. We can not raise the softirq with base lock held due + * to a possible deadlock with runqueue lock. + */ + raise = timer->state == HRTIMER_STATE_PENDING; + + /* + * We use preempt_disable to prevent this task from migrating after + * setting up the softirq and raising it. Otherwise, if me migrate + * we will raise the softirq on the wrong CPU. + */ + preempt_disable(); unlock_hrtimer_base(timer, &flags); + if (raise) + hrtimer_raise_softirq(); + preempt_enable(); + return ret; } EXPORT_SYMBOL_GPL(hrtimer_start); @@ -887,7 +1083,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) +#ifdef CONFIG_NO_HZ /** * hrtimer_get_next_event - get the time until next expiry event * @@ -927,14 +1123,8 @@ ktime_t hrtimer_get_next_event(void) } #endif -/** - * hrtimer_init - initialize a timer to the given clock - * @timer: the timer to be initialized - * @clock_id: the clock to be used - * @mode: timer mode abs/rel - */ -void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) { struct hrtimer_cpu_base *cpu_base; @@ -946,6 +1136,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; + INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS @@ -954,6 +1145,19 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, memset(timer->start_comm, 0, TASK_COMM_LEN); #endif } + +/** + * hrtimer_init - initialize a timer to the given clock + * @timer: the timer to be initialized + * @clock_id: the clock to be used + * @mode: timer mode abs/rel + */ +void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_hrtimer_init(timer); + __hrtimer_init(timer, clock_id, mode); +} EXPORT_SYMBOL_GPL(hrtimer_init); /** @@ -975,6 +1179,98 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); +static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) +{ + spin_lock_irq(&cpu_base->lock); + + while (!list_empty(&cpu_base->cb_pending)) { + enum hrtimer_restart (*fn)(struct hrtimer *); + struct hrtimer *timer; + int restart; + + timer = list_entry(cpu_base->cb_pending.next, + struct hrtimer, cb_entry); + + debug_hrtimer_deactivate(timer); + timer_stats_account_hrtimer(timer); + + fn = timer->function; + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); + spin_unlock_irq(&cpu_base->lock); + + restart = fn(timer); + + spin_lock_irq(&cpu_base->lock); + + timer->state &= ~HRTIMER_STATE_CALLBACK; + if (restart == HRTIMER_RESTART) { + BUG_ON(hrtimer_active(timer)); + /* + * Enqueue the timer, allow reprogramming of the event + * device + */ + enqueue_hrtimer(timer, timer->base, 1); + } else if (hrtimer_active(timer)) { + /* + * If the timer was rearmed on another CPU, reprogram + * the event device. + */ + struct hrtimer_clock_base *base = timer->base; + + if (base->first == &timer->node && + hrtimer_reprogram(timer, base)) { + /* + * Timer is expired. Thus move it from tree to + * pending list again. + */ + __remove_hrtimer(timer, base, + HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + } + } + } + spin_unlock_irq(&cpu_base->lock); +} + +static void __run_hrtimer(struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + struct hrtimer_cpu_base *cpu_base = base->cpu_base; + enum hrtimer_restart (*fn)(struct hrtimer *); + int restart; + + debug_hrtimer_deactivate(timer); + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); + timer_stats_account_hrtimer(timer); + + fn = timer->function; + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { + /* + * Used for scheduler timers, avoid lock inversion with + * rq->lock and tasklist_lock. + * + * These timers are required to deal with enqueue expiry + * themselves and are not allowed to migrate. + */ + spin_unlock(&cpu_base->lock); + restart = fn(timer); + spin_lock(&cpu_base->lock); + } else + restart = fn(timer); + + /* + * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid + * reprogramming of the event hardware. This happens at the end of this + * function anyway. + */ + if (restart != HRTIMER_NORESTART) { + BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); + enqueue_hrtimer(timer, base, 0); + } + timer->state &= ~HRTIMER_STATE_CALLBACK; +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1032,21 +1328,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) continue; } - __remove_hrtimer(timer, base, - HRTIMER_STATE_CALLBACK, 0); - timer_stats_account_hrtimer(timer); - - /* - * Note: We clear the CALLBACK bit after - * enqueue_hrtimer to avoid reprogramming of - * the event hardware. This happens at the end - * of this function anyway. - */ - if (timer->function(timer) != HRTIMER_NORESTART) { - BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); - enqueue_hrtimer(timer, base, 0); - } - timer->state &= ~HRTIMER_STATE_CALLBACK; + __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); base++; @@ -1067,109 +1349,21 @@ void hrtimer_interrupt(struct clock_event_device *dev) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); - - timer_stats_account_hrtimer(timer); - - fn = timer->function; - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - if (timer->base->first == &timer->node) - hrtimer_reprogram(timer, timer->base); - } - } - spin_unlock_irq(&cpu_base->lock); + run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); } #endif /* CONFIG_HIGH_RES_TIMERS */ /* - * Expire the per base hrtimer-queue: - */ -static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, - int index) -{ - struct rb_node *node; - struct hrtimer_clock_base *base = &cpu_base->clock_base[index]; - - if (!base->first) - return; - - if (base->get_softirq_time) - base->softirq_time = base->get_softirq_time(); - - spin_lock_irq(&cpu_base->lock); - - while ((node = base->first)) { - struct hrtimer *timer; - enum hrtimer_restart (*fn)(struct hrtimer *); - int restart; - - timer = rb_entry(node, struct hrtimer, node); - if (base->softirq_time.tv64 <= timer->expires.tv64) - break; - -#ifdef CONFIG_HIGH_RES_TIMERS - WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ); -#endif - timer_stats_account_hrtimer(timer); - - fn = timer->function; - __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart != HRTIMER_NORESTART) { - BUG_ON(hrtimer_active(timer)); - enqueue_hrtimer(timer, base, 0); - } - } - spin_unlock_irq(&cpu_base->lock); -} - -/* * Called from timer softirq every jiffy, expire hrtimers: * * For HRT its the fall back code to run the softirq in the timer * softirq context in case the hrtimer initialization failed or has * not been done yet. */ -void hrtimer_run_queues(void) +void hrtimer_run_pending(void) { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - int i; if (hrtimer_hres_active()) return; @@ -1183,13 +1377,58 @@ void hrtimer_run_queues(void) * deadlock vs. xtime_lock. */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) - if (hrtimer_switch_to_hres()) - return; + hrtimer_switch_to_hres(); - hrtimer_get_softirq_time(cpu_base); + run_hrtimer_pending(cpu_base); +} - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) - run_hrtimer_queue(cpu_base, i); +/* + * Called from hardirq context every jiffy + */ +void hrtimer_run_queues(void) +{ + struct rb_node *node; + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_clock_base *base; + int index, gettime = 1; + + if (hrtimer_hres_active()) + return; + + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { + base = &cpu_base->clock_base[index]; + + if (!base->first) + continue; + + if (base->get_softirq_time) + base->softirq_time = base->get_softirq_time(); + else if (gettime) { + hrtimer_get_softirq_time(cpu_base); + gettime = 0; + } + + spin_lock(&cpu_base->lock); + + while ((node = base->first)) { + struct hrtimer *timer; + + timer = rb_entry(node, struct hrtimer, node); + if (base->softirq_time.tv64 <= timer->expires.tv64) + break; + + if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { + __remove_hrtimer(timer, base, + HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + continue; + } + + __run_hrtimer(timer); + } + spin_unlock(&cpu_base->lock); + } } /* @@ -1213,7 +1452,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) sl->timer.function = hrtimer_wakeup; sl->task = task; #ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART; + sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; #endif } @@ -1224,6 +1463,8 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start(&t->timer, t->timer.expires, mode); + if (!hrtimer_active(&t->timer)) + t->task = NULL; if (likely(t->task)) schedule(); @@ -1233,38 +1474,52 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod } while (t->task && !signal_pending(current)); + __set_current_state(TASK_RUNNING); + return t->task == NULL; } +static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) +{ + struct timespec rmt; + ktime_t rem; + + rem = ktime_sub(timer->expires, timer->base->get_time()); + if (rem.tv64 <= 0) + return 0; + rmt = ktime_to_timespec(rem); + + if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) + return -EFAULT; + + return 1; +} + long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; - struct timespec __user *rmtp; - struct timespec tu; - ktime_t time; - - restart->fn = do_no_restart_syscall; + struct timespec __user *rmtp; + int ret = 0; - hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); - t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; + hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + HRTIMER_MODE_ABS); + t.timer.expires.tv64 = restart->nanosleep.expires; if (do_nanosleep(&t, HRTIMER_MODE_ABS)) - return 0; + goto out; - rmtp = (struct timespec __user *) restart->arg1; + rmtp = restart->nanosleep.rmtp; if (rmtp) { - time = ktime_sub(t.timer.expires, t.timer.base->get_time()); - if (time.tv64 <= 0) - return 0; - tu = ktime_to_timespec(time); - if (copy_to_user(rmtp, &tu, sizeof(tu))) - return -EFAULT; + ret = update_rmtp(&t.timer, rmtp); + if (ret <= 0) + goto out; } - restart->fn = hrtimer_nanosleep_restart; - /* The other values in restart are already filled in */ - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, @@ -1272,35 +1527,35 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, { struct restart_block *restart; struct hrtimer_sleeper t; - struct timespec tu; - ktime_t rem; + int ret = 0; - hrtimer_init(&t.timer, clockid, mode); + hrtimer_init_on_stack(&t.timer, clockid, mode); t.timer.expires = timespec_to_ktime(*rqtp); if (do_nanosleep(&t, mode)) - return 0; + goto out; /* Absolute timers do not update the rmtp value and restart: */ - if (mode == HRTIMER_MODE_ABS) - return -ERESTARTNOHAND; + if (mode == HRTIMER_MODE_ABS) { + ret = -ERESTARTNOHAND; + goto out; + } if (rmtp) { - rem = ktime_sub(t.timer.expires, t.timer.base->get_time()); - if (rem.tv64 <= 0) - return 0; - tu = ktime_to_timespec(rem); - if (copy_to_user(rmtp, &tu, sizeof(tu))) - return -EFAULT; + ret = update_rmtp(&t.timer, rmtp); + if (ret <= 0) + goto out; } restart = ¤t_thread_info()->restart_block; restart->fn = hrtimer_nanosleep_restart; - restart->arg0 = (unsigned long) t.timer.base->index; - restart->arg1 = (unsigned long) rmtp; - restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF; - restart->arg3 = t.timer.expires.tv64 >> 32; + restart->nanosleep.index = t.timer.base->index; + restart->nanosleep.rmtp = rmtp; + restart->nanosleep.expires = t.timer.expires.tv64; - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } asmlinkage long @@ -1320,17 +1575,17 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) /* * Functions related to boot-time initialization: */ -static void __devinit init_hrtimers_cpu(int cpu) +static void __cpuinit init_hrtimers_cpu(int cpu) { struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; spin_lock_init(&cpu_base->lock); - lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; + INIT_LIST_HEAD(&cpu_base->cb_pending); hrtimer_init_hres(cpu_base); } @@ -1345,6 +1600,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); timer->base = new_base; /* @@ -1366,16 +1622,16 @@ static void migrate_hrtimers(int cpu) tick_cancel_sched_timer(cpu); local_irq_disable(); - double_spin_lock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_lock(&new_base->lock); + spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - double_spin_unlock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); local_irq_enable(); put_cpu_var(hrtimer_bases); } @@ -1384,16 +1640,18 @@ static void migrate_hrtimers(int cpu) static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - long cpu = (long)hcpu; + unsigned int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: init_hrtimers_cpu(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); migrate_hrtimers(cpu); break; @@ -1416,7 +1674,7 @@ void __init hrtimers_init(void) (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); #ifdef CONFIG_HIGH_RES_TIMERS - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); #endif }