X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fhrtimer.c;h=cb8a15c1958318c52c116d08e33003ad2ea0cc98;hb=3aea09dc9106407d8bc18e593fbffda9ad632844;hp=782137dc755f27658d42103832ee472d0815e9f6;hpb=7bb67439bf6bd3782f07f1d7be1e63406453d5de;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 782137d..cb8a15c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -32,7 +32,6 @@ */ #include -#include #include #include #include @@ -442,22 +441,6 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif -/* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_PENDING; -} - -/* - * Remove a timer from the callback pending list - */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) -{ - list_del_init(&timer->cb_entry); -} - /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -517,7 +500,14 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) if (!base->first) continue; timer = rb_entry(base->first, struct hrtimer, node); - expires = ktime_sub(timer->expires, base->offset); + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + /* + * clock_was_set() has changed base->offset so the + * result might be negative. Fix it up to prevent a + * false positive in clockevents_program_event() + */ + if (expires.tv64 < 0) + expires.tv64 = 0; if (expires.tv64 < cpu_base->expires_next.tv64) cpu_base->expires_next = expires; } @@ -539,10 +529,10 @@ static int hrtimer_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; - ktime_t expires = ktime_sub(timer->expires, base->offset); + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); int res; - WARN_ON_ONCE(timer->expires.tv64 < 0); + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); /* * When the callback is running, we do not reprogram the clock event @@ -631,7 +621,9 @@ void clock_was_set(void) */ void hres_timers_resume(void) { - /* Retrigger the CPU local events: */ + WARN_ONCE(!irqs_disabled(), + KERN_INFO "hres_timers_resume() called with IRQs enabled!"); + retrigger_next_event(NULL); } @@ -651,6 +643,7 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } + /* * When High resolution timers are active, try to reprogram. Note, that in case * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry @@ -658,43 +651,20 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) * and expiry check is done in the hrtimer_interrupt or in the softirq. */ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + int wakeup) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { + if (wakeup) { + spin_unlock(&base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + spin_lock(&base->cpu_base->lock); + } else + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); - /* Timer is expired, act upon the callback mode */ - switch(timer->cb_mode) { - case HRTIMER_CB_IRQSAFE_NO_RESTART: - debug_hrtimer_deactivate(timer); - /* - * We can call the callback from here. No restart - * happens, so no danger of recursion - */ - BUG_ON(timer->function(timer) != HRTIMER_NORESTART); - return 1; - case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: - /* - * This is solely for the sched tick emulation with - * dynamic tick support to ensure that we do not - * restart the tick right on the edge and end up with - * the tick timer in the softirq ! The calling site - * takes care of this. - */ - debug_hrtimer_deactivate(timer); - return 1; - case HRTIMER_CB_IRQSAFE: - case HRTIMER_CB_SOFTIRQ: - /* - * Move everything else into the softirq pending list ! - */ - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - timer->state = HRTIMER_STATE_PENDING; - return 1; - default: - BUG(); - } + return 1; } + return 0; } @@ -732,11 +702,6 @@ static int hrtimer_switch_to_hres(void) return 1; } -static inline void hrtimer_raise_softirq(void) -{ - raise_softirq(HRTIMER_SOFTIRQ); -} - #else static inline int hrtimer_hres_active(void) { return 0; } @@ -744,18 +709,13 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + int wakeup) { return 0; } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } -static inline int hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - return 0; -} -static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -794,7 +754,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) u64 orun = 1; ktime_t delta; - delta = ktime_sub(now, timer->expires); + delta = ktime_sub(now, hrtimer_get_expires(timer)); if (delta.tv64 < 0) return 0; @@ -806,8 +766,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) s64 incr = ktime_to_ns(interval); orun = ktime_divns(delta, incr); - timer->expires = ktime_add_ns(timer->expires, incr * orun); - if (timer->expires.tv64 > now.tv64) + hrtimer_add_expires_ns(timer, incr * orun); + if (hrtimer_get_expires_tv64(timer) > now.tv64) return orun; /* * This (and the ktime_add() below) is the @@ -815,7 +775,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) */ orun++; } - timer->expires = ktime_add_safe(timer->expires, interval); + hrtimer_add_expires(timer, interval); return orun; } @@ -826,9 +786,11 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); * * The timer is inserted in expiry order. Insertion into the * red black tree is O(log(n)). Must hold the base lock. + * + * Returns 1 when the new timer is the leftmost timer in the tree. */ -static void enqueue_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base, int reprogram) +static int enqueue_hrtimer(struct hrtimer *timer, + struct hrtimer_clock_base *base) { struct rb_node **link = &base->active.rb_node; struct rb_node *parent = NULL; @@ -847,7 +809,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * We dont care about collisions. Nodes with * the same expiry time stay together. */ - if (timer->expires.tv64 < entry->expires.tv64) { + if (hrtimer_get_expires_tv64(timer) < + hrtimer_get_expires_tv64(entry)) { link = &(*link)->rb_left; } else { link = &(*link)->rb_right; @@ -859,20 +822,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * Insert the timer to the rbtree and check whether it * replaces the first pending timer */ - if (leftmost) { - /* - * Reprogram the clock event device. When the timer is already - * expired hrtimer_enqueue_reprogram has either called the - * callback or added it to the pending list and raised the - * softirq. - * - * This is a NOP for !HIGHRES - */ - if (reprogram && hrtimer_enqueue_reprogram(timer, base)) - return; - + if (leftmost) base->first = &timer->node; - } rb_link_node(&timer->node, parent, link); rb_insert_color(&timer->node, &base->active); @@ -881,6 +832,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * state of a possibly running callback. */ timer->state |= HRTIMER_STATE_ENQUEUED; + + return leftmost; } /* @@ -897,10 +850,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, unsigned long newstate, int reprogram) { - /* High res. callback list. NOP for !HIGHRES */ - if (hrtimer_cb_pending(timer)) - hrtimer_remove_cb_pending(timer); - else { + if (timer->state & HRTIMER_STATE_ENQUEUED) { /* * Remove the timer from the rbtree and replace the * first entry pointer if necessary. @@ -943,22 +893,13 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) return 0; } -/** - * hrtimer_start - (re)start an relative timer on the current CPU - * @timer: the timer to be added - * @tim: expiry time - * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) - * - * Returns: - * 0 on success - * 1 when the timer was active - */ -int -hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, const enum hrtimer_mode mode, + int wakeup) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret, raise; + int ret, leftmost; base = lock_hrtimer_base(timer, &flags); @@ -982,41 +923,62 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #endif } - timer->expires = tim; + hrtimer_set_expires_range_ns(timer, tim, delta_ns); timer_stats_hrtimer_set_start_info(timer); + leftmost = enqueue_hrtimer(timer, new_base); + /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) + * + * XXX send_remote_softirq() ? */ - enqueue_hrtimer(timer, new_base, - new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); - - /* - * The timer may be expired and moved to the cb_pending - * list. We can not raise the softirq with base lock held due - * to a possible deadlock with runqueue lock. - */ - raise = timer->state == HRTIMER_STATE_PENDING; - - /* - * We use preempt_disable to prevent this task from migrating after - * setting up the softirq and raising it. Otherwise, if me migrate - * we will raise the softirq on the wrong CPU. - */ - preempt_disable(); + if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) + hrtimer_enqueue_reprogram(timer, new_base, wakeup); unlock_hrtimer_base(timer, &flags); - if (raise) - hrtimer_raise_softirq(); - preempt_enable(); - return ret; } + +/** + * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @delta_ns: "slack" range for the timer + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, const enum hrtimer_mode mode) +{ + return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1); +} +EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); + +/** + * hrtimer_start - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + return __hrtimer_start_range_ns(timer, tim, 0, mode, 1); +} EXPORT_SYMBOL_GPL(hrtimer_start); + /** * hrtimer_try_to_cancel - try to deactivate a timer * @timer: hrtimer to stop @@ -1076,7 +1038,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) ktime_t rem; base = lock_hrtimer_base(timer, &flags); - rem = ktime_sub(timer->expires, base->get_time()); + rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); return rem; @@ -1108,7 +1070,7 @@ ktime_t hrtimer_get_next_event(void) continue; timer = rb_entry(base->first, struct hrtimer, node); - delta.tv64 = timer->expires.tv64; + delta.tv64 = hrtimer_get_expires_tv64(timer); delta = ktime_sub(delta, base->get_time()); if (delta.tv64 < mindelta.tv64) mindelta.tv64 = delta.tv64; @@ -1179,60 +1141,6 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); -static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) -{ - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); - - debug_hrtimer_deactivate(timer); - timer_stats_account_hrtimer(timer); - - fn = timer->function; - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - struct hrtimer_clock_base *base = timer->base; - - if (base->first == &timer->node && - hrtimer_reprogram(timer, base)) { - /* - * Timer is expired. Thus move it from tree to - * pending list again. - */ - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - } - } - } - spin_unlock_irq(&cpu_base->lock); -} - static void __run_hrtimer(struct hrtimer *timer) { struct hrtimer_clock_base *base = timer->base; @@ -1240,39 +1148,59 @@ static void __run_hrtimer(struct hrtimer *timer) enum hrtimer_restart (*fn)(struct hrtimer *); int restart; + WARN_ON(!irqs_disabled()); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); - fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { - /* - * Used for scheduler timers, avoid lock inversion with - * rq->lock and tasklist_lock. - * - * These timers are required to deal with enqueue expiry - * themselves and are not allowed to migrate. - */ - spin_unlock(&cpu_base->lock); - restart = fn(timer); - spin_lock(&cpu_base->lock); - } else - restart = fn(timer); /* - * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid - * reprogramming of the event hardware. This happens at the end of this - * function anyway. + * Because we run timers from hardirq context, there is no chance + * they get migrated to another cpu, therefore its safe to unlock + * the timer base. + */ + spin_unlock(&cpu_base->lock); + restart = fn(timer); + spin_lock(&cpu_base->lock); + + /* + * Note: We clear the CALLBACK bit after enqueue_hrtimer and + * we do not reprogramm the event hardware. Happens either in + * hrtimer_start_range_ns() or in hrtimer_interrupt() */ if (restart != HRTIMER_NORESTART) { BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); - enqueue_hrtimer(timer, base, 0); + enqueue_hrtimer(timer, base); } timer->state &= ~HRTIMER_STATE_CALLBACK; } #ifdef CONFIG_HIGH_RES_TIMERS +static int force_clock_reprogram; + +/* + * After 5 iteration's attempts, we consider that hrtimer_interrupt() + * is hanging, which could happen with something that slows the interrupt + * such as the tracing. Then we force the clock reprogramming for each future + * hrtimer interrupts to avoid infinite loops and use the min_delta_ns + * threshold that we will overwrite. + * The next tick event will be scheduled to 3 times we currently spend on + * hrtimer_interrupt(). This gives a good compromise, the cpus will spend + * 1/4 of their time to process the hrtimer interrupts. This is enough to + * let it running without serious starvation. + */ + +static inline void +hrtimer_interrupt_hanging(struct clock_event_device *dev, + ktime_t try_time) +{ + force_clock_reprogram = 1; + dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; + printk(KERN_WARNING "hrtimer: interrupt too slow, " + "forcing clock min delta to %lu ns\n", dev->min_delta_ns); +} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1282,13 +1210,18 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; - int i, raise = 0; + int nr_retries = 0; + int i; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; retry: + /* 5 retries is enough to notice a hang */ + if (!(++nr_retries % 5)) + hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); + now = ktime_get(); expires_next.tv64 = KTIME_MAX; @@ -1308,26 +1241,29 @@ void hrtimer_interrupt(struct clock_event_device *dev) timer = rb_entry(node, struct hrtimer, node); - if (basenow.tv64 < timer->expires.tv64) { + /* + * The immediate goal for using the softexpires is + * minimizing wakeups, not running timers at the + * earliest interrupt after their soft expiration. + * This allows us to avoid using a Priority Search + * Tree, which can answer a stabbing querry for + * overlapping intervals and instead use the simple + * BST we already have. + * We don't add extra wakeups by delaying timers that + * are right-of a not yet expired timer, because that + * timer will have to trigger a wakeup anyway. + */ + + if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { ktime_t expires; - expires = ktime_sub(timer->expires, + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires.tv64 < expires_next.tv64) expires_next = expires; break; } - /* Move softirq callbacks to the pending list */ - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - raise = 1; - continue; - } - __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); @@ -1338,21 +1274,55 @@ void hrtimer_interrupt(struct clock_event_device *dev) /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, 0)) + if (tick_program_event(expires_next, force_clock_reprogram)) goto retry; } +} + +/* + * local version of hrtimer_peek_ahead_timers() called with interrupts + * disabled. + */ +static void __hrtimer_peek_ahead_timers(void) +{ + struct tick_device *td; + + if (!hrtimer_hres_active()) + return; + + td = &__get_cpu_var(tick_cpu_device); + if (td && td->evtdev) + hrtimer_interrupt(td->evtdev); +} + +/** + * hrtimer_peek_ahead_timers -- run soft-expired timers now + * + * hrtimer_peek_ahead_timers will peek at the timer queue of + * the current cpu and check if there are any timers for which + * the soft expires time has passed. If any such timers exist, + * they are run immediately and then removed from the timer queue. + * + */ +void hrtimer_peek_ahead_timers(void) +{ + unsigned long flags; - /* Raise softirq ? */ - if (raise) - raise_softirq(HRTIMER_SOFTIRQ); + local_irq_save(flags); + __hrtimer_peek_ahead_timers(); + local_irq_restore(flags); } static void run_hrtimer_softirq(struct softirq_action *h) { - run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); + hrtimer_peek_ahead_timers(); } -#endif /* CONFIG_HIGH_RES_TIMERS */ +#else /* CONFIG_HIGH_RES_TIMERS */ + +static inline void __hrtimer_peek_ahead_timers(void) { } + +#endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from timer softirq every jiffy, expire hrtimers: @@ -1363,8 +1333,6 @@ static void run_hrtimer_softirq(struct softirq_action *h) */ void hrtimer_run_pending(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - if (hrtimer_hres_active()) return; @@ -1378,8 +1346,6 @@ void hrtimer_run_pending(void) */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) hrtimer_switch_to_hres(); - - run_hrtimer_pending(cpu_base); } /* @@ -1401,9 +1367,7 @@ void hrtimer_run_queues(void) if (!base->first) continue; - if (base->get_softirq_time) - base->softirq_time = base->get_softirq_time(); - else if (gettime) { + if (gettime) { hrtimer_get_softirq_time(cpu_base); gettime = 0; } @@ -1414,17 +1378,10 @@ void hrtimer_run_queues(void) struct hrtimer *timer; timer = rb_entry(node, struct hrtimer, node); - if (base->softirq_time.tv64 <= timer->expires.tv64) + if (base->softirq_time.tv64 <= + hrtimer_get_expires_tv64(timer)) break; - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - continue; - } - __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); @@ -1451,9 +1408,6 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; sl->task = task; -#ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; -#endif } static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) @@ -1462,7 +1416,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod do { set_current_state(TASK_INTERRUPTIBLE); - hrtimer_start(&t->timer, t->timer.expires, mode); + hrtimer_start_expires(&t->timer, mode); if (!hrtimer_active(&t->timer)) t->task = NULL; @@ -1484,7 +1438,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) struct timespec rmt; ktime_t rem; - rem = ktime_sub(timer->expires, timer->base->get_time()); + rem = hrtimer_expires_remaining(timer); if (rem.tv64 <= 0) return 0; rmt = ktime_to_timespec(rem); @@ -1503,7 +1457,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); - t.timer.expires.tv64 = restart->nanosleep.expires; + hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); if (do_nanosleep(&t, HRTIMER_MODE_ABS)) goto out; @@ -1528,9 +1482,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; + unsigned long slack; + + slack = current->timer_slack_ns; + if (rt_task(current)) + slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - t.timer.expires = timespec_to_ktime(*rqtp); + hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); if (do_nanosleep(&t, mode)) goto out; @@ -1550,7 +1509,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.index = t.timer.base->index; restart->nanosleep.rmtp = rmtp; - restart->nanosleep.expires = t.timer.expires.tv64; + restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); ret = -ERESTART_RESTARTBLOCK; out: @@ -1558,8 +1517,8 @@ out: return ret; } -asmlinkage long -sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) +SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec tu; @@ -1585,7 +1544,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; - INIT_LIST_HEAD(&cpu_base->cb_pending); hrtimer_init_hres(cpu_base); } @@ -1601,27 +1559,44 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); debug_hrtimer_deactivate(timer); - __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); + + /* + * Mark it as STATE_MIGRATE not INACTIVE otherwise the + * timer could be seen as !active and just vanish away + * under us on another CPU + */ + __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); timer->base = new_base; /* - * Enqueue the timer. Allow reprogramming of the event device + * Enqueue the timers on the new cpu. This does not + * reprogram the event device in case the timer + * expires before the earliest on this CPU, but we run + * hrtimer_interrupt after we migrated everything to + * sort out already expired timers and reprogram the + * event device. */ - enqueue_hrtimer(timer, new_base, 1); + enqueue_hrtimer(timer, new_base); + + /* Clear the migration state bit */ + timer->state &= ~HRTIMER_STATE_MIGRATE; } } -static void migrate_hrtimers(int cpu) +static void migrate_hrtimers(int scpu) { struct hrtimer_cpu_base *old_base, *new_base; int i; - BUG_ON(cpu_online(cpu)); - old_base = &per_cpu(hrtimer_bases, cpu); - new_base = &get_cpu_var(hrtimer_bases); - - tick_cancel_sched_timer(cpu); + BUG_ON(cpu_online(scpu)); + tick_cancel_sched_timer(scpu); local_irq_disable(); + old_base = &per_cpu(hrtimer_bases, scpu); + new_base = &__get_cpu_var(hrtimer_bases); + /* + * The caller is globally serialized and nobody else + * takes two locks at once, deadlock is not possible. + */ spin_lock(&new_base->lock); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); @@ -1632,29 +1607,38 @@ static void migrate_hrtimers(int cpu) spin_unlock(&old_base->lock); spin_unlock(&new_base->lock); + + /* Check, if we got expired work to do */ + __hrtimer_peek_ahead_timers(); local_irq_enable(); - put_cpu_var(hrtimer_bases); } + #endif /* CONFIG_HOTPLUG_CPU */ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (long)hcpu; + int scpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - init_hrtimers_cpu(cpu); + init_hrtimers_cpu(scpu); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DYING: + case CPU_DYING_FROZEN: + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); + break; case CPU_DEAD: case CPU_DEAD_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); - migrate_hrtimers(cpu); + { + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); + migrate_hrtimers(scpu); break; + } #endif default: @@ -1679,14 +1663,20 @@ void __init hrtimers_init(void) } /** - * schedule_hrtimeout - sleep until timeout + * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to @@ -1700,7 +1690,7 @@ void __init hrtimers_init(void) * * Returns 0 when the timer has expired otherwise -EINTR */ -int __sched schedule_hrtimeout(ktime_t *expires, +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode) { struct hrtimer_sleeper t; @@ -1724,11 +1714,11 @@ int __sched schedule_hrtimeout(ktime_t *expires, } hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); - t.timer.expires = *expires; + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); - hrtimer_start(&t.timer, t.timer.expires, mode); + hrtimer_start_expires(&t.timer, mode); if (!hrtimer_active(&t.timer)) t.task = NULL; @@ -1742,4 +1732,33 @@ int __sched schedule_hrtimeout(ktime_t *expires, return !t.task ? 0 : -EINTR; } +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); + +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + return schedule_hrtimeout_range(expires, 0, mode); +} EXPORT_SYMBOL_GPL(schedule_hrtimeout);