timekeeping: Fix timezone update
[safe/jmp/linux-2.6] / kernel / time / timekeeping.c
index 03cbeb3..caf8d4d 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/sysdev.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
@@ -164,19 +165,12 @@ struct timespec raw_time;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
-       xtime_cache = xtime;
-       timespec_add_ns(&xtime_cache, nsec);
-}
-
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
        xtime.tv_sec += leapsecond;
        wall_to_monotonic.tv_sec -= leapsecond;
-       update_vsyscall(&xtime, timekeeper.clock);
+       update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
 
 #ifdef CONFIG_GENERIC_TIME
@@ -331,12 +325,10 @@ int do_settimeofday(struct timespec *tv)
 
        xtime = *tv;
 
-       update_xtime_cache(0);
-
        timekeeper.ntp_error = 0;
        ntp_clear();
 
-       update_vsyscall(&xtime, timekeeper.clock);
+       update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 
        write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -487,6 +479,17 @@ int timekeeping_valid_for_hres(void)
 }
 
 /**
+ * timekeeping_max_deferment - Returns max time the clocksource can be deferred
+ *
+ * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
+ * ensure that the clocksource does not change!
+ */
+u64 timekeeping_max_deferment(void)
+{
+       return timekeeper.clock->max_idle_ns;
+}
+
+/**
  * read_persistent_clock -  Return time from the persistent clock.
  *
  * Weak dummy function for arches that do not yet support it.
@@ -547,7 +550,6 @@ void __init timekeeping_init(void)
        }
        set_normalized_timespec(&wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
-       update_xtime_cache(0);
        total_sleep_time.tv_sec = 0;
        total_sleep_time.tv_nsec = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -581,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
                wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                total_sleep_time = timespec_add_safe(total_sleep_time, ts);
        }
-       update_xtime_cache(0);
        /* re-base the last cycle value */
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
@@ -610,6 +611,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
        write_sequnlock_irqrestore(&xtime_lock, flags);
 
        clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+       clocksource_suspend();
 
        return 0;
 }
@@ -721,6 +723,51 @@ static void timekeeping_adjust(s64 offset)
                                timekeeper.ntp_error_shift;
 }
 
+
+/**
+ * logarithmic_accumulation - shifted accumulation of cycles
+ *
+ * This functions accumulates a shifted interval of cycles into
+ * into a shifted interval nanoseconds. Allows for O(log) accumulation
+ * loop.
+ *
+ * Returns the unconsumed cycles.
+ */
+static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+{
+       u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
+
+       /* If the offset is smaller then a shifted interval, do nothing */
+       if (offset < timekeeper.cycle_interval<<shift)
+               return offset;
+
+       /* Accumulate one shifted interval */
+       offset -= timekeeper.cycle_interval << shift;
+       timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
+
+       timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+       while (timekeeper.xtime_nsec >= nsecps) {
+               timekeeper.xtime_nsec -= nsecps;
+               xtime.tv_sec++;
+               second_overflow();
+       }
+
+       /* Accumulate into raw time */
+       raw_time.tv_nsec += timekeeper.raw_interval << shift;;
+       while (raw_time.tv_nsec >= NSEC_PER_SEC) {
+               raw_time.tv_nsec -= NSEC_PER_SEC;
+               raw_time.tv_sec++;
+       }
+
+       /* Accumulate error between NTP and clock interval */
+       timekeeper.ntp_error += tick_length << shift;
+       timekeeper.ntp_error -= timekeeper.xtime_interval <<
+                               (timekeeper.ntp_error_shift + shift);
+
+       return offset;
+}
+
+
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
@@ -730,7 +777,7 @@ void update_wall_time(void)
 {
        struct clocksource *clock;
        cycle_t offset;
-       u64 nsecs;
+       int shift = 0, maxshift;
 
        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
@@ -744,33 +791,23 @@ void update_wall_time(void)
 #endif
        timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
 
-       /* normally this loop will run just once, however in the
-        * case of lost or late ticks, it will accumulate correctly.
+       /*
+        * With NO_HZ we may have to accumulate many cycle_intervals
+        * (think "ticks") worth of time at once. To do this efficiently,
+        * we calculate the largest doubling multiple of cycle_intervals
+        * that is smaller then the offset. We then accumulate that
+        * chunk in one go, and then try to consume the next smaller
+        * doubled multiple.
         */
+       shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+       shift = max(0, shift);
+       /* Bound shift to one less then what overflows tick_length */
+       maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+       shift = min(shift, maxshift);
        while (offset >= timekeeper.cycle_interval) {
-               u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
-
-               /* accumulate one interval */
-               offset -= timekeeper.cycle_interval;
-               clock->cycle_last += timekeeper.cycle_interval;
-
-               timekeeper.xtime_nsec += timekeeper.xtime_interval;
-               if (timekeeper.xtime_nsec >= nsecps) {
-                       timekeeper.xtime_nsec -= nsecps;
-                       xtime.tv_sec++;
-                       second_overflow();
-               }
-
-               raw_time.tv_nsec += timekeeper.raw_interval;
-               if (raw_time.tv_nsec >= NSEC_PER_SEC) {
-                       raw_time.tv_nsec -= NSEC_PER_SEC;
-                       raw_time.tv_sec++;
-               }
-
-               /* accumulate error between NTP and clock interval */
-               timekeeper.ntp_error += tick_length;
-               timekeeper.ntp_error -= timekeeper.xtime_interval <<
-                                       timekeeper.ntp_error_shift;
+               offset = logarithmic_accumulation(offset, shift);
+               if(offset < timekeeper.cycle_interval<<shift)
+                       shift--;
        }
 
        /* correct the clock when NTP error is too big */
@@ -798,7 +835,9 @@ void update_wall_time(void)
                timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
        }
 
-       /* store full nanoseconds into xtime after rounding it up and
+
+       /*
+        * Store full nanoseconds into xtime after rounding it up and
         * add the remainder to the error difference.
         */
        xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -806,11 +845,18 @@ void update_wall_time(void)
        timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                timekeeper.ntp_error_shift;
 
-       nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
-       update_xtime_cache(nsecs);
+       /*
+        * Finally, make sure that after the rounding
+        * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+        */
+       if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+               xtime.tv_nsec -= NSEC_PER_SEC;
+               xtime.tv_sec++;
+               second_overflow();
+       }
 
        /* check to see if there is a new clocksource to use */
-       update_vsyscall(&xtime, timekeeper.clock);
+       update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
 
 /**
@@ -826,11 +872,14 @@ void update_wall_time(void)
  */
 void getboottime(struct timespec *ts)
 {
-       struct timespec boottime;
+       struct timespec boottime = {
+               .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec,
+               .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec
+       };
 
-       boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time);
        set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
 }
+EXPORT_SYMBOL_GPL(getboottime);
 
 /**
  * monotonic_to_bootbased - Convert the monotonic time to boot based.
@@ -840,16 +889,17 @@ void monotonic_to_bootbased(struct timespec *ts)
 {
        *ts = timespec_add_safe(*ts, total_sleep_time);
 }
+EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
 unsigned long get_seconds(void)
 {
-       return xtime_cache.tv_sec;
+       return xtime.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-       return xtime_cache;
+       return xtime;
 }
 
 struct timespec current_kernel_time(void)
@@ -860,7 +910,7 @@ struct timespec current_kernel_time(void)
        do {
                seq = read_seqbegin(&xtime_lock);
 
-               now = xtime_cache;
+               now = xtime;
        } while (read_seqretry(&xtime_lock, seq));
 
        return now;
@@ -875,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
        do {
                seq = read_seqbegin(&xtime_lock);
 
-               now = xtime_cache;
+               now = xtime;
                mono = wall_to_monotonic;
        } while (read_seqretry(&xtime_lock, seq));