[PATCH] x86_64: Fix numa node topology detection for srat based x86_64 boxes
[safe/jmp/linux-2.6] / arch / x86_64 / kernel / time.c
index 943a9bf..703acde 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/mc146818rtc.h>
-#include <linux/irq.h>
 #include <linux/time.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/bcd.h>
 #include <linux/kallsyms.h>
+#include <linux/acpi.h>
+#ifdef CONFIG_ACPI
+#include <acpi/achware.h>      /* for PM timer frequency */
+#endif
 #include <asm/8253pit.h>
 #include <asm/pgtable.h>
 #include <asm/vsyscall.h>
@@ -60,6 +63,7 @@ static int notsc __initdata = 0;
 unsigned int cpu_khz;                                  /* TSC clocks / usec, not used here */
 static unsigned long hpet_period;                      /* fsecs / HPET clock */
 unsigned long hpet_tick;                               /* HPET clocks / interrupt */
+static int hpet_use_timer;
 unsigned long vxtime_hz = PIT_TICK_RATE;
 int report_lost_ticks;                         /* command line option */
 unsigned long long monotonic_base;
@@ -101,7 +105,9 @@ static inline unsigned int do_gettimeoffset_tsc(void)
 
 static inline unsigned int do_gettimeoffset_hpet(void)
 {
-       return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
+       /* cap counter read to one tick to avoid inconsistencies */
+       unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
+       return (min(counter,hpet_tick) * vxtime.quot) >> 32;
 }
 
 unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -169,10 +175,7 @@ int do_settimeofday(struct timespec *tv)
        set_normalized_timespec(&xtime, sec, nsec);
        set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
 
-       time_adjust = 0;                /* stop active adjtime() */
-       time_status |= STA_UNSYNC;
-       time_maxerror = NTP_PHASE_LIMIT;
-       time_esterror = NTP_PHASE_LIMIT;
+       ntp_clear();
 
        write_sequnlock_irq(&xtime_lock);
        clock_was_set();
@@ -297,7 +300,7 @@ unsigned long long monotonic_clock(void)
 
                        last_offset = vxtime.last;
                        base = monotonic_base;
-                       this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+                       this_offset = hpet_readl(HPET_COUNTER);
 
                } while (read_seqretry(&xtime_lock, seq));
                offset = (this_offset - last_offset);
@@ -373,7 +376,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 
        write_seqlock(&xtime_lock);
 
-       if (vxtime.hpet_address) {
+       if (vxtime.hpet_address)
+               offset = hpet_readl(HPET_COUNTER);
+
+       if (hpet_use_timer) {
+               /* if we're using the hpet timer functionality,
+                * we can more accurately know the counter value
+                * when the timer interrupt occured.
+                */
                offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
                delay = hpet_readl(HPET_COUNTER) - offset;
        } else {
@@ -396,6 +406,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
                        (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
 
                vxtime.last = offset;
+#ifdef CONFIG_X86_PM_TIMER
+       } else if (vxtime.mode == VXTIME_PMTMR) {
+               lost = pmtimer_mark_offset();
+#endif
        } else {
                offset = (((tsc - vxtime.last_tsc) *
                           vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
@@ -453,7 +467,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  * off) isn't likely to go away much sooner anyway.
  */
 
-       if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
+       if (ntp_synced() && xtime.tv_sec > rtc_update &&
                abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
                set_rtc_mmss(xtime.tv_sec);
                rtc_update = xtime.tv_sec + 660;
@@ -795,17 +809,18 @@ static int hpet_timer_stop_set_go(unsigned long tick)
  * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
  * and period also hpet_tick.
  */
-
-       hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+       if (hpet_use_timer) {
+               hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
                    HPET_TN_32BIT, HPET_T0_CFG);
-       hpet_writel(hpet_tick, HPET_T0_CMP);
-       hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
-
+               hpet_writel(hpet_tick, HPET_T0_CMP);
+               hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
+               cfg |= HPET_CFG_LEGACY;
+       }
 /*
  * Go!
  */
 
-       cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+       cfg |= HPET_CFG_ENABLE;
        hpet_writel(cfg, HPET_CFG);
 
        return 0;
@@ -826,8 +841,7 @@ static int hpet_init(void)
 
        id = hpet_readl(HPET_ID);
 
-       if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
-           !(id & HPET_ID_LEGSUP))
+       if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
                return -1;
 
        hpet_period = hpet_readl(HPET_PERIOD);
@@ -837,6 +851,8 @@ static int hpet_init(void)
        hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
                hpet_period;
 
+       hpet_use_timer = (id & HPET_ID_LEGSUP);
+
        return hpet_timer_stop_set_go(hpet_tick);
 }
 
@@ -893,11 +909,20 @@ void __init time_init(void)
        set_normalized_timespec(&wall_to_monotonic,
                                -xtime.tv_sec, -xtime.tv_nsec);
 
-       if (!hpet_init()) {
+       if (!hpet_init())
                 vxtime_hz = (1000000000000000L + hpet_period / 2) /
                        hpet_period;
+
+       if (hpet_use_timer) {
                cpu_khz = hpet_calibrate_tsc();
                timename = "HPET";
+#ifdef CONFIG_X86_PM_TIMER
+       } else if (pmtmr_ioport) {
+               vxtime_hz = PM_TIMER_FREQUENCY;
+               timename = "PM";
+               pit_init();
+               cpu_khz = pit_calibrate_tsc();
+#endif
        } else {
                pit_init();
                cpu_khz = pit_calibrate_tsc();
@@ -911,7 +936,6 @@ void __init time_init(void)
        vxtime.mode = VXTIME_TSC;
        vxtime.quot = (1000000L << 32) / vxtime_hz;
        vxtime.tsc_quot = (1000L << 32) / cpu_khz;
-       vxtime.hz = vxtime_hz;
        rdtscll_sync(&vxtime.last_tsc);
        setup_irq(0, &irq0);
 
@@ -923,37 +947,49 @@ void __init time_init(void)
 }
 
 /*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+static __init int unsynchronized_tsc(void)
+{
+#ifdef CONFIG_SMP
+       if (oem_force_hpet_timer())
+               return 1;
+       /* Intel systems are normally all synchronized. Exceptions
+          are handled in the OEM check above. */
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               return 0;
+#endif
+       /* Assume multi socket systems are not synchronized */
+       return num_online_cpus() > 1;
+}
+
+/*
  * Decide after all CPUs are booted what mode gettimeofday should use.
  */
 void __init time_init_gtod(void)
 {
        char *timetype;
 
-       /*
-        * AMD systems with more than one CPU don't have fully synchronized
-        * TSCs. Always use HPET gettimeofday for these, although it is slower.
-        * Intel SMP systems usually have synchronized TSCs, so use always
-        * the TSC.
-        *
-        * Exceptions:
-        * IBM Summit2 checked by oem_force_hpet_timer().
-        * AMD dual core may also not need HPET. Check me.
-        *
-        * Can be turned off with "notsc".
-        */
-       if (num_online_cpus() > 1 &&
-           boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-               notsc = 1;
-       /* Some systems will want to disable TSC and use HPET. */
-       if (oem_force_hpet_timer())
+       if (unsynchronized_tsc())
                notsc = 1;
        if (vxtime.hpet_address && notsc) {
-               timetype = "HPET";
+               timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
                vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
                vxtime.mode = VXTIME_HPET;
                do_gettimeoffset = do_gettimeoffset_hpet;
+#ifdef CONFIG_X86_PM_TIMER
+       /* Using PM for gettimeofday is quite slow, but we have no other
+          choice because the TSC is too unreliable on some systems. */
+       } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
+               timetype = "PM";
+               do_gettimeoffset = do_gettimeoffset_pm;
+               vxtime.mode = VXTIME_PMTMR;
+               sysctl_vsyscall = 0;
+               printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
+#endif
        } else {
-               timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
+               timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
                vxtime.mode = VXTIME_TSC;
        }
 
@@ -965,7 +1001,7 @@ __setup("report_lost_ticks", time_setup);
 static long clock_cmos_diff;
 static unsigned long sleep_start;
 
-static int timer_suspend(struct sys_device *dev, u32 state)
+static int timer_suspend(struct sys_device *dev, pm_message_t state)
 {
        /*
         * Estimate time zone so that set_time can update the clock
@@ -997,6 +1033,7 @@ static int timer_resume(struct sys_device *dev)
        write_sequnlock_irqrestore(&xtime_lock,flags);
        jiffies += sleep_length;
        wall_jiffies += sleep_length;
+       touch_softlockup_watchdog();
        return 0;
 }