*
* Create a semi stable clock from a mixture of other events, including:
* - gtod
- * - jiffies
* - sched_clock()
* - explicit idle events
*
* We use gtod as base and the unstable clock deltas. The deltas are filtered,
- * making it monotonic and keeping it within an expected window. This window
- * is set up using jiffies.
+ * making it monotonic and keeping it within an expected window.
*
* Furthermore, explicit sleep and wakeup hooks allow us to account for time
* that is otherwise invisible (TSC gets stopped).
*
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
- * consistent between cpus (never more than 1 jiffies difference).
+ * consistent between cpus (never more than 2 jiffies difference).
*/
-#include <linux/sched.h>
-#include <linux/percpu.h>
#include <linux/spinlock.h>
-#include <linux/ktime.h>
+#include <linux/hardirq.h>
#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+ return (unsigned long long)(jiffies - INITIAL_JIFFIES)
+ * (NSEC_PER_SEC / HZ);
+}
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static __read_mostly int sched_clock_running;
-#define MULTI_SHIFT 15
-/* Max is double, Min is 1/2 */
-#define MAX_MULTI (2LL << MULTI_SHIFT)
-#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__read_mostly int sched_clock_stable;
struct sched_clock_data {
- /*
- * Raw spinlock - this is a special case: this might be called
- * from within instrumentation code so we dont want to do any
- * instrumentation ourselves.
- */
- raw_spinlock_t lock;
-
- unsigned long tick_jiffies;
- u64 prev_raw;
u64 tick_raw;
u64 tick_gtod;
u64 clock;
- s64 multi;
-#ifdef CONFIG_NO_HZ
- int check_max;
-#endif
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
return &per_cpu(sched_clock_data, cpu);
}
-static __read_mostly int sched_clock_running;
-
void sched_clock_init(void)
{
u64 ktime_now = ktime_to_ns(ktime_get());
- unsigned long now_jiffies = jiffies;
int cpu;
for_each_possible_cpu(cpu) {
struct sched_clock_data *scd = cpu_sdc(cpu);
- scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
- scd->tick_jiffies = now_jiffies;
- scd->prev_raw = 0;
scd->tick_raw = 0;
scd->tick_gtod = ktime_now;
scd->clock = ktime_now;
- scd->multi = 1 << MULTI_SHIFT;
-#ifdef CONFIG_NO_HZ
- scd->check_max = 1;
-#endif
}
sched_clock_running = 1;
}
-#ifdef CONFIG_NO_HZ
/*
- * The dynamic ticks makes the delta jiffies inaccurate. This
- * prevents us from checking the maximum time update.
- * Disable the maximum check during stopped ticks.
+ * min, max except they take wrapping into account
*/
-void sched_clock_tick_stop(int cpu)
-{
- struct sched_clock_data *scd = cpu_sdc(cpu);
-
- scd->check_max = 0;
-}
-void sched_clock_tick_start(int cpu)
+static inline u64 wrap_min(u64 x, u64 y)
{
- struct sched_clock_data *scd = cpu_sdc(cpu);
-
- scd->check_max = 1;
+ return (s64)(x - y) < 0 ? x : y;
}
-static int check_max(struct sched_clock_data *scd)
-{
- return scd->check_max;
-}
-#else
-static int check_max(struct sched_clock_data *scd)
+static inline u64 wrap_max(u64 x, u64 y)
{
- return 1;
+ return (s64)(x - y) > 0 ? x : y;
}
-#endif /* CONFIG_NO_HZ */
/*
* update the percpu scd from the raw @now value
*
* - filter out backward motion
- * - use jiffies to generate a min,max window to clip the raw values
+ * - use the GTOD tick value to create a window to filter crazy TSC values
*/
-static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *time)
+static u64 sched_clock_local(struct sched_clock_data *scd)
{
- unsigned long now_jiffies = jiffies;
- long delta_jiffies = now_jiffies - scd->tick_jiffies;
- u64 clock = scd->clock;
- u64 min_clock, max_clock;
- s64 delta = now - scd->prev_raw;
+ u64 now, clock, old_clock, min_clock, max_clock;
+ s64 delta;
- WARN_ON_ONCE(!irqs_disabled());
-
- /*
- * At schedule tick the clock can be just under the gtod. We don't
- * want to push it too prematurely.
- */
- min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
- if (min_clock > TICK_NSEC)
- min_clock -= TICK_NSEC / 2;
+again:
+ now = sched_clock();
+ delta = now - scd->tick_raw;
+ if (unlikely(delta < 0))
+ delta = 0;
- if (unlikely(delta < 0)) {
- clock++;
- goto out;
- }
+ old_clock = scd->clock;
/*
- * The clock must stay within a jiffie of the gtod.
- * But since we may be at the start of a jiffy or the end of one
- * we add another jiffy buffer.
+ * scd->clock = clamp(scd->tick_gtod + delta,
+ * max(scd->tick_gtod, scd->clock),
+ * scd->tick_gtod + TICK_NSEC);
*/
- max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
- delta *= scd->multi;
- delta >>= MULTI_SHIFT;
+ clock = scd->tick_gtod + delta;
+ min_clock = wrap_max(scd->tick_gtod, old_clock);
+ max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
- if (unlikely(clock + delta > max_clock) && check_max(scd)) {
- if (clock < max_clock)
- clock = max_clock;
- else
- clock++;
- } else {
- clock += delta;
- }
+ clock = wrap_max(clock, min_clock);
+ clock = wrap_min(clock, max_clock);
- out:
- if (unlikely(clock < min_clock))
- clock = min_clock;
+ if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
+ goto again;
- if (time)
- *time = clock;
- else {
- scd->prev_raw = now;
- scd->clock = clock;
- }
+ return clock;
}
-static void lock_double_clock(struct sched_clock_data *data1,
- struct sched_clock_data *data2)
+static u64 sched_clock_remote(struct sched_clock_data *scd)
{
- if (data1 < data2) {
- __raw_spin_lock(&data1->lock);
- __raw_spin_lock(&data2->lock);
+ struct sched_clock_data *my_scd = this_scd();
+ u64 this_clock, remote_clock;
+ u64 *ptr, old_val, val;
+
+ sched_clock_local(my_scd);
+again:
+ this_clock = my_scd->clock;
+ remote_clock = scd->clock;
+
+ /*
+ * Use the opportunity that we have both locks
+ * taken to couple the two clocks: we take the
+ * larger time as the latest time for both
+ * runqueues. (this creates monotonic movement)
+ */
+ if (likely((s64)(remote_clock - this_clock) < 0)) {
+ ptr = &scd->clock;
+ old_val = remote_clock;
+ val = this_clock;
} else {
- __raw_spin_lock(&data2->lock);
- __raw_spin_lock(&data1->lock);
+ /*
+ * Should be rare, but possible:
+ */
+ ptr = &my_scd->clock;
+ old_val = this_clock;
+ val = remote_clock;
}
+
+ if (cmpxchg64(ptr, old_val, val) != old_val)
+ goto again;
+
+ return val;
}
u64 sched_clock_cpu(int cpu)
{
- struct sched_clock_data *scd = cpu_sdc(cpu);
- u64 now, clock;
-
- if (unlikely(!sched_clock_running))
- return 0ull;
+ struct sched_clock_data *scd;
+ u64 clock;
WARN_ON_ONCE(!irqs_disabled());
- now = sched_clock();
-
- if (cpu != raw_smp_processor_id()) {
- /*
- * in order to update a remote cpu's clock based on our
- * unstable raw time rebase it against:
- * tick_raw (offset between raw counters)
- * tick_gotd (tick offset between cpus)
- */
- struct sched_clock_data *my_scd = this_scd();
-
- lock_double_clock(scd, my_scd);
-
- now -= my_scd->tick_raw;
- now += scd->tick_raw;
-
- now += my_scd->tick_gtod;
- now -= scd->tick_gtod;
- __raw_spin_unlock(&my_scd->lock);
+ if (sched_clock_stable)
+ return sched_clock();
- __update_sched_clock(scd, now, &clock);
+ if (unlikely(!sched_clock_running))
+ return 0ull;
- __raw_spin_unlock(&scd->lock);
+ scd = cpu_sdc(cpu);
- } else {
- __raw_spin_lock(&scd->lock);
- __update_sched_clock(scd, now, NULL);
- clock = scd->clock;
- __raw_spin_unlock(&scd->lock);
- }
+ if (cpu != smp_processor_id())
+ clock = sched_clock_remote(scd);
+ else
+ clock = sched_clock_local(scd);
return clock;
}
void sched_clock_tick(void)
{
- struct sched_clock_data *scd = this_scd();
- unsigned long now_jiffies = jiffies;
- s64 mult, delta_gtod, delta_raw;
+ struct sched_clock_data *scd;
u64 now, now_gtod;
+ if (sched_clock_stable)
+ return;
+
if (unlikely(!sched_clock_running))
return;
WARN_ON_ONCE(!irqs_disabled());
+ scd = this_scd();
now_gtod = ktime_to_ns(ktime_get());
now = sched_clock();
- __raw_spin_lock(&scd->lock);
- __update_sched_clock(scd, now, NULL);
- /*
- * update tick_gtod after __update_sched_clock() because that will
- * already observe 1 new jiffy; adding a new tick_gtod to that would
- * increase the clock 2 jiffies.
- */
- delta_gtod = now_gtod - scd->tick_gtod;
- delta_raw = now - scd->tick_raw;
-
- if ((long)delta_raw > 0) {
- mult = delta_gtod << MULTI_SHIFT;
- do_div(mult, delta_raw);
- scd->multi = mult;
- if (scd->multi > MAX_MULTI)
- scd->multi = MAX_MULTI;
- else if (scd->multi < MIN_MULTI)
- scd->multi = MIN_MULTI;
- } else
- scd->multi = 1 << MULTI_SHIFT;
-
scd->tick_raw = now;
scd->tick_gtod = now_gtod;
- scd->tick_jiffies = now_jiffies;
- __raw_spin_unlock(&scd->lock);
+ sched_clock_local(scd);
}
/*
*/
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
- struct sched_clock_data *scd = this_scd();
- u64 now = sched_clock();
-
- /*
- * Override the previous timestamp and ignore all
- * sched_clock() deltas that occured while we idled,
- * and use the PM-provided delta_ns to advance the
- * rq clock:
- */
- __raw_spin_lock(&scd->lock);
- scd->prev_raw = now;
- scd->clock += delta_ns;
- scd->multi = 1 << MULTI_SHIFT;
- __raw_spin_unlock(&scd->lock);
+ if (timekeeping_suspended)
+ return;
+ sched_clock_tick();
touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-#endif
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- * This is default implementation.
- * Architectures and sub-architectures can override this.
- */
-unsigned long long __attribute__((weak)) sched_clock(void)
-{
- return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
-}
-
unsigned long long cpu_clock(int cpu)
{
unsigned long long clock;
return clock;
}
+
+#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
+
+void sched_clock_init(void)
+{
+ sched_clock_running = 1;
+}
+
+u64 sched_clock_cpu(int cpu)
+{
+ if (unlikely(!sched_clock_running))
+ return 0;
+
+ return sched_clock();
+}
+
+
+unsigned long long cpu_clock(int cpu)
+{
+ return sched_clock_cpu(cpu);
+}
+
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
+
EXPORT_SYMBOL_GPL(cpu_clock);