#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
#include <asm/uaccess.h>
}
}
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ return preferred_cpu;
+ }
+#endif
+ return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires;
+
+ if (!new_base->cpu_base->hres_active)
+ return 0;
+
+ expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+ return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+ return 0;
+#endif
+}
+
/*
* Switch the timer base to the current CPU when possible.
*/
static inline struct hrtimer_clock_base *
-switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
+ int pinned)
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
+ int this_cpu = smp_processor_id();
+ int cpu = hrtimer_get_target(this_cpu, pinned);
- new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+ new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[base->index];
if (base != new_base) {
/*
- * We are trying to schedule the timer on the local CPU.
+ * We are trying to move timer to new_base.
* However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq
timer->base = NULL;
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
+
+ if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+ cpu = this_cpu;
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
+ }
timer->base = new_base;
}
return new_base;
return base;
}
-# define switch_hrtimer_base(t, b) (b)
+# define switch_hrtimer_base(t, b, p) (b)
#endif /* !CONFIG_SMP */
return res;
}
+EXPORT_SYMBOL_GPL(ktime_add_safe);
+
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
static struct debug_obj_descr hrtimer_debug_descr;
continue;
timer = rb_entry(base->first, struct hrtimer, node);
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+ /*
+ * clock_was_set() has changed base->offset so the
+ * result might be negative. Fix it up to prevent a
+ * false positive in clockevents_program_event()
+ */
+ if (expires.tv64 < 0)
+ expires.tv64 = 0;
if (expires.tv64 < cpu_base->expires_next.tv64)
cpu_base->expires_next = expires;
}
*/
void hres_timers_resume(void)
{
- /* Retrigger the CPU local events: */
+ WARN_ONCE(!irqs_disabled(),
+ KERN_INFO "hres_timers_resume() called with IRQs enabled!");
+
retrigger_next_event(NULL);
}
* and expiry check is done in the hrtimer_interrupt or in the softirq.
*/
static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
+ struct hrtimer_clock_base *base,
+ int wakeup)
{
if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
- spin_unlock(&base->cpu_base->lock);
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
- spin_lock(&base->cpu_base->lock);
+ if (wakeup) {
+ spin_unlock(&base->cpu_base->lock);
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ spin_lock(&base->cpu_base->lock);
+ } else
+ __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+
return 1;
}
+
return 0;
}
static inline int hrtimer_switch_to_hres(void) { return 0; }
static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
+ struct hrtimer_clock_base *base,
+ int wakeup)
{
return 0;
}
return 0;
}
-/**
- * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
- * @timer: the timer to be added
- * @tim: expiry time
- * @delta_ns: "slack" range for the timer
- * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
- *
- * Returns:
- * 0 on success
- * 1 when the timer was active
- */
-int
-hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
- const enum hrtimer_mode mode)
+int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+ unsigned long delta_ns, const enum hrtimer_mode mode,
+ int wakeup)
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
ret = remove_hrtimer(timer, base);
/* Switch the timer base, if necessary: */
- new_base = switch_hrtimer_base(timer, base);
+ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
- if (mode == HRTIMER_MODE_REL) {
+ if (mode & HRTIMER_MODE_REL) {
tim = ktime_add_safe(tim, new_base->get_time());
/*
* CONFIG_TIME_LOW_RES is a temporary way for architectures
* XXX send_remote_softirq() ?
*/
if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
- hrtimer_enqueue_reprogram(timer, new_base);
+ hrtimer_enqueue_reprogram(timer, new_base, wakeup);
unlock_hrtimer_base(timer, &flags);
return ret;
}
+
+/**
+ * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
+ * @timer: the timer to be added
+ * @tim: expiry time
+ * @delta_ns: "slack" range for the timer
+ * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ *
+ * Returns:
+ * 0 on success
+ * 1 when the timer was active
+ */
+int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+ unsigned long delta_ns, const enum hrtimer_mode mode)
+{
+ return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
+}
EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
/**
int
hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
{
- return hrtimer_start_range_ns(timer, tim, 0, mode);
+ return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
}
EXPORT_SYMBOL_GPL(hrtimer_start);
#ifdef CONFIG_HIGH_RES_TIMERS
+static int force_clock_reprogram;
+
+/*
+ * After 5 iteration's attempts, we consider that hrtimer_interrupt()
+ * is hanging, which could happen with something that slows the interrupt
+ * such as the tracing. Then we force the clock reprogramming for each future
+ * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
+ * threshold that we will overwrite.
+ * The next tick event will be scheduled to 3 times we currently spend on
+ * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
+ * 1/4 of their time to process the hrtimer interrupts. This is enough to
+ * let it running without serious starvation.
+ */
+
+static inline void
+hrtimer_interrupt_hanging(struct clock_event_device *dev,
+ ktime_t try_time)
+{
+ force_clock_reprogram = 1;
+ dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
+ printk(KERN_WARNING "hrtimer: interrupt too slow, "
+ "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
+}
/*
* High resolution timer interrupt
* Called with interrupts disabled
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base;
ktime_t expires_next, now;
+ int nr_retries = 0;
int i;
BUG_ON(!cpu_base->hres_active);
dev->next_event.tv64 = KTIME_MAX;
retry:
+ /* 5 retries is enough to notice a hang */
+ if (!(++nr_retries % 5))
+ hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
+
now = ktime_get();
expires_next.tv64 = KTIME_MAX;
+ spin_lock(&cpu_base->lock);
+ /*
+ * We set expires_next to KTIME_MAX here with cpu_base->lock
+ * held to prevent that a timer is enqueued in our queue via
+ * the migration code. This does not affect enqueueing of
+ * timers which run their callback and need to be requeued on
+ * this CPU.
+ */
+ cpu_base->expires_next.tv64 = KTIME_MAX;
+
base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow;
struct rb_node *node;
- spin_lock(&cpu_base->lock);
-
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
__run_hrtimer(timer);
}
- spin_unlock(&cpu_base->lock);
base++;
}
+ /*
+ * Store the new expiry value so the migration code can verify
+ * against it.
+ */
cpu_base->expires_next = expires_next;
+ spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
- if (tick_program_event(expires_next, 0))
+ if (tick_program_event(expires_next, force_clock_reprogram))
goto retry;
}
}
hrtimer_peek_ahead_timers();
}
-#endif /* CONFIG_HIGH_RES_TIMERS */
+#else /* CONFIG_HIGH_RES_TIMERS */
+
+static inline void __hrtimer_peek_ahead_timers(void) { }
+
+#endif /* !CONFIG_HIGH_RES_TIMERS */
/*
* Called from timer softirq every jiffy, expire hrtimers:
return ret;
}
-asmlinkage long
-sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
+SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
+ struct timespec __user *, rmtp)
{
struct timespec tu;
break;
#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+ break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
{