2 * linux/arch/x86-64/kernel/time.c
4 * "High Precision Event Timer" based timekeeping.
6 * Copyright (c) 1991,1992,1995 Linus Torvalds
7 * Copyright (c) 1994 Alan Modra
8 * Copyright (c) 1995 Markus Kuhn
9 * Copyright (c) 1996 Ingo Molnar
10 * Copyright (c) 1998 Andrea Arcangeli
11 * Copyright (c) 2002,2006 Vojtech Pavlik
12 * Copyright (c) 2003 Andi Kleen
13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/interrupt.h>
19 #include <linux/init.h>
20 #include <linux/mc146818rtc.h>
21 #include <linux/time.h>
22 #include <linux/ioport.h>
23 #include <linux/module.h>
24 #include <linux/device.h>
25 #include <linux/sysdev.h>
26 #include <linux/bcd.h>
27 #include <linux/notifier.h>
28 #include <linux/cpu.h>
29 #include <linux/kallsyms.h>
30 #include <linux/acpi.h>
32 #include <acpi/achware.h> /* for PM timer frequency */
33 #include <acpi/acpi_bus.h>
35 #include <asm/8253pit.h>
36 #include <asm/pgtable.h>
37 #include <asm/vsyscall.h>
38 #include <asm/timex.h>
39 #include <asm/proto.h>
41 #include <asm/sections.h>
42 #include <linux/cpufreq.h>
43 #include <linux/hpet.h>
46 #ifdef CONFIG_CPU_FREQ
47 static void cpufreq_delayed_get(void);
49 extern void i8254_timer_resume(void);
50 extern int using_apic_timer;
52 static char *timename = NULL;
54 DEFINE_SPINLOCK(rtc_lock);
55 EXPORT_SYMBOL(rtc_lock);
56 DEFINE_SPINLOCK(i8253_lock);
58 int nohpet __initdata = 0;
59 static int notsc __initdata = 0;
61 #define USEC_PER_TICK (USEC_PER_SEC / HZ)
62 #define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
63 #define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
65 #define NS_SCALE 10 /* 2^10, carefully chosen */
66 #define US_SCALE 32 /* 2^32, arbitralrily chosen */
68 unsigned int cpu_khz; /* TSC clocks / usec, not used here */
69 EXPORT_SYMBOL(cpu_khz);
70 unsigned long hpet_address;
71 static unsigned long hpet_period; /* fsecs / HPET clock */
72 unsigned long hpet_tick; /* HPET clocks / interrupt */
73 int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
74 unsigned long vxtime_hz = PIT_TICK_RATE;
75 int report_lost_ticks; /* command line option */
76 unsigned long long monotonic_base;
78 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
80 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
81 struct timespec __xtime __section_xtime;
82 struct timezone __sys_tz __section_sys_tz;
85 * do_gettimeoffset() returns microseconds since last timer interrupt was
86 * triggered by hardware. A memory read of HPET is slower than a register read
87 * of TSC, but much more reliable. It's also synchronized to the timer
88 * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
89 * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
90 * This is not a problem, because jiffies hasn't updated either. They are bound
91 * together by xtime_lock.
94 static inline unsigned int do_gettimeoffset_tsc(void)
98 t = get_cycles_sync();
99 if (t < vxtime.last_tsc)
100 t = vxtime.last_tsc; /* hack */
101 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
105 static inline unsigned int do_gettimeoffset_hpet(void)
107 /* cap counter read to one tick to avoid inconsistencies */
108 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
109 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
112 unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
115 * This version of gettimeofday() has microsecond resolution and better than
116 * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
120 void do_gettimeofday(struct timeval *tv)
123 unsigned int sec, usec;
126 seq = read_seqbegin(&xtime_lock);
129 usec = xtime.tv_nsec / NSEC_PER_USEC;
131 /* i386 does some correction here to keep the clock
132 monotonous even when ntpd is fixing drift.
133 But they didn't work for me, there is a non monotonic
134 clock anyways with ntp.
135 I dropped all corrections now until a real solution can
136 be found. Note when you fix it here you need to do the same
137 in arch/x86_64/kernel/vsyscall.c and export all needed
138 variables in vmlinux.lds. -AK */
139 usec += do_gettimeoffset();
141 } while (read_seqretry(&xtime_lock, seq));
143 tv->tv_sec = sec + usec / USEC_PER_SEC;
144 tv->tv_usec = usec % USEC_PER_SEC;
147 EXPORT_SYMBOL(do_gettimeofday);
150 * settimeofday() first undoes the correction that gettimeofday would do
151 * on the time, and then saves it. This is ugly, but has been like this for
155 int do_settimeofday(struct timespec *tv)
157 time_t wtm_sec, sec = tv->tv_sec;
158 long wtm_nsec, nsec = tv->tv_nsec;
160 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
163 write_seqlock_irq(&xtime_lock);
165 nsec -= do_gettimeoffset() * NSEC_PER_USEC;
167 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
168 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
170 set_normalized_timespec(&xtime, sec, nsec);
171 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
175 write_sequnlock_irq(&xtime_lock);
180 EXPORT_SYMBOL(do_settimeofday);
182 unsigned long profile_pc(struct pt_regs *regs)
184 unsigned long pc = instruction_pointer(regs);
186 /* Assume the lock function has either no stack frame or a copy
188 Eflags always has bits 22 and up cleared unlike kernel addresses. */
189 if (!user_mode(regs) && in_lock_functions(pc)) {
190 unsigned long *sp = (unsigned long *)regs->rsp;
198 EXPORT_SYMBOL(profile_pc);
201 * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
202 * ms after the second nowtime has started, because when nowtime is written
203 * into the registers of the CMOS clock, it will jump to the next second
204 * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
208 static void set_rtc_mmss(unsigned long nowtime)
210 int real_seconds, real_minutes, cmos_minutes;
211 unsigned char control, freq_select;
214 * IRQs are disabled when we're called from the timer interrupt,
215 * no need for spin_lock_irqsave()
218 spin_lock(&rtc_lock);
221 * Tell the clock it's being set and stop it.
224 control = CMOS_READ(RTC_CONTROL);
225 CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
227 freq_select = CMOS_READ(RTC_FREQ_SELECT);
228 CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
230 cmos_minutes = CMOS_READ(RTC_MINUTES);
231 BCD_TO_BIN(cmos_minutes);
234 * since we're only adjusting minutes and seconds, don't interfere with hour
235 * overflow. This avoids messing with unknown time zones but requires your RTC
236 * not to be off by more than 15 minutes. Since we're calling it only when
237 * our clock is externally synchronized using NTP, this shouldn't be a problem.
240 real_seconds = nowtime % 60;
241 real_minutes = nowtime / 60;
242 if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
243 real_minutes += 30; /* correct for half hour time zone */
246 if (abs(real_minutes - cmos_minutes) >= 30) {
247 printk(KERN_WARNING "time.c: can't update CMOS clock "
248 "from %d to %d\n", cmos_minutes, real_minutes);
250 BIN_TO_BCD(real_seconds);
251 BIN_TO_BCD(real_minutes);
252 CMOS_WRITE(real_seconds, RTC_SECONDS);
253 CMOS_WRITE(real_minutes, RTC_MINUTES);
257 * The following flags have to be released exactly in this order, otherwise the
258 * DS12887 (popular MC146818A clone with integrated battery and quartz) will
259 * not reset the oscillator and will not update precisely 500 ms later. You
260 * won't find this mentioned in the Dallas Semiconductor data sheets, but who
261 * believes data sheets anyway ... -- Markus Kuhn
264 CMOS_WRITE(control, RTC_CONTROL);
265 CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
267 spin_unlock(&rtc_lock);
271 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
272 * Note: This function is required to return accurate
273 * time even in the absence of multiple timer ticks.
275 static inline unsigned long long cycles_2_ns(unsigned long long cyc);
276 unsigned long long monotonic_clock(void)
279 u32 last_offset, this_offset, offset;
280 unsigned long long base;
282 if (vxtime.mode == VXTIME_HPET) {
284 seq = read_seqbegin(&xtime_lock);
286 last_offset = vxtime.last;
287 base = monotonic_base;
288 this_offset = hpet_readl(HPET_COUNTER);
289 } while (read_seqretry(&xtime_lock, seq));
290 offset = (this_offset - last_offset);
291 offset *= NSEC_PER_TICK / hpet_tick;
294 seq = read_seqbegin(&xtime_lock);
296 last_offset = vxtime.last_tsc;
297 base = monotonic_base;
298 } while (read_seqretry(&xtime_lock, seq));
299 this_offset = get_cycles_sync();
300 offset = cycles_2_ns(this_offset - last_offset);
302 return base + offset;
304 EXPORT_SYMBOL(monotonic_clock);
306 static noinline void handle_lost_ticks(int lost)
308 static long lost_count;
310 if (report_lost_ticks) {
311 printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
312 print_symbol("rip %s)\n", get_irq_regs()->rip);
315 if (lost_count == 1000 && !warned) {
316 printk(KERN_WARNING "warning: many lost ticks.\n"
317 KERN_WARNING "Your time source seems to be instable or "
318 "some driver is hogging interupts\n");
319 print_symbol("rip %s\n", get_irq_regs()->rip);
320 if (vxtime.mode == VXTIME_TSC && hpet_address) {
321 printk(KERN_WARNING "Falling back to HPET\n");
323 vxtime.last = hpet_readl(HPET_T0_CMP) -
326 vxtime.last = hpet_readl(HPET_COUNTER);
327 vxtime.mode = VXTIME_HPET;
328 vxtime.hpet_address = hpet_address;
329 do_gettimeoffset = do_gettimeoffset_hpet;
331 /* else should fall back to PIT, but code missing. */
336 #ifdef CONFIG_CPU_FREQ
337 /* In some cases the CPU can change frequency without us noticing
338 Give cpufreq a change to catch up. */
339 if ((lost_count+1) % 25 == 0)
340 cpufreq_delayed_get();
344 void main_timer_handler(void)
346 static unsigned long rtc_update = 0;
348 int delay = 0, offset = 0, lost = 0;
351 * Here we are in the timer irq handler. We have irqs locally disabled (so we
352 * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
353 * on the other CPU, so we need a lock. We also need to lock the vsyscall
354 * variables, because both do_timer() and us change them -arca+vojtech
357 write_seqlock(&xtime_lock);
360 offset = hpet_readl(HPET_COUNTER);
362 if (hpet_use_timer) {
363 /* if we're using the hpet timer functionality,
364 * we can more accurately know the counter value
365 * when the timer interrupt occured.
367 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
368 delay = hpet_readl(HPET_COUNTER) - offset;
369 } else if (!pmtmr_ioport) {
370 spin_lock(&i8253_lock);
373 delay |= inb(0x40) << 8;
374 spin_unlock(&i8253_lock);
375 delay = LATCH - 1 - delay;
378 tsc = get_cycles_sync();
380 if (vxtime.mode == VXTIME_HPET) {
381 if (offset - vxtime.last > hpet_tick) {
382 lost = (offset - vxtime.last) / hpet_tick - 1;
386 (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
388 vxtime.last = offset;
389 #ifdef CONFIG_X86_PM_TIMER
390 } else if (vxtime.mode == VXTIME_PMTMR) {
391 lost = pmtimer_mark_offset();
394 offset = (((tsc - vxtime.last_tsc) *
395 vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
400 if (offset > USEC_PER_TICK) {
401 lost = offset / USEC_PER_TICK;
402 offset %= USEC_PER_TICK;
405 monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
407 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
409 if ((((tsc - vxtime.last_tsc) *
410 vxtime.tsc_quot) >> US_SCALE) < offset)
411 vxtime.last_tsc = tsc -
412 (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
416 handle_lost_ticks(lost);
421 * Do the timer stuff.
426 update_process_times(user_mode(get_irq_regs()));
430 * In the SMP case we use the local APIC timer interrupt to do the profiling,
431 * except when we simulate SMP mode on a uniprocessor system, in that case we
432 * have to call the local interrupt handler.
435 if (!using_apic_timer)
436 smp_local_timer_interrupt();
439 * If we have an externally synchronized Linux clock, then update CMOS clock
440 * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
441 * closest to exactly 500 ms before the next second. If the update fails, we
442 * don't care, as it'll be updated on the next turn, and the problem (time way
443 * off) isn't likely to go away much sooner anyway.
446 if (ntp_synced() && xtime.tv_sec > rtc_update &&
447 abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
448 set_rtc_mmss(xtime.tv_sec);
449 rtc_update = xtime.tv_sec + 660;
452 write_sequnlock(&xtime_lock);
455 static irqreturn_t timer_interrupt(int irq, void *dev_id)
457 if (apic_runs_main_timer > 1)
459 main_timer_handler();
460 if (using_apic_timer)
461 smp_send_timer_broadcast_ipi();
465 static unsigned int cyc2ns_scale __read_mostly;
467 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
469 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
472 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
474 return (cyc * cyc2ns_scale) >> NS_SCALE;
477 unsigned long long sched_clock(void)
482 /* Don't do a HPET read here. Using TSC always is much faster
483 and HPET may not be mapped yet when the scheduler first runs.
484 Disadvantage is a small drift between CPUs in some configurations,
485 but that should be tolerable. */
486 if (__vxtime.mode == VXTIME_HPET)
487 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
490 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
491 which means it is not completely exact and may not be monotonous between
492 CPUs. But the errors should be too small to matter for scheduling
496 return cycles_2_ns(a);
499 static unsigned long get_cmos_time(void)
501 unsigned int year, mon, day, hour, min, sec;
503 unsigned century = 0;
505 spin_lock_irqsave(&rtc_lock, flags);
508 sec = CMOS_READ(RTC_SECONDS);
509 min = CMOS_READ(RTC_MINUTES);
510 hour = CMOS_READ(RTC_HOURS);
511 day = CMOS_READ(RTC_DAY_OF_MONTH);
512 mon = CMOS_READ(RTC_MONTH);
513 year = CMOS_READ(RTC_YEAR);
515 if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
516 acpi_gbl_FADT.century)
517 century = CMOS_READ(acpi_gbl_FADT.century);
519 } while (sec != CMOS_READ(RTC_SECONDS));
521 spin_unlock_irqrestore(&rtc_lock, flags);
524 * We know that x86-64 always uses BCD format, no need to check the
537 year += century * 100;
538 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
541 * x86-64 systems only exists since 2002.
542 * This will work up to Dec 31, 2100
547 return mktime(year, mon, day, hour, min, sec);
550 #ifdef CONFIG_CPU_FREQ
552 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
555 RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
556 not that important because current Opteron setups do not support
557 scaling on SMP anyroads.
559 Should fix up last_tsc too. Currently gettimeofday in the
560 first tick after the change will be slightly wrong. */
562 #include <linux/workqueue.h>
564 static unsigned int cpufreq_delayed_issched = 0;
565 static unsigned int cpufreq_init = 0;
566 static struct work_struct cpufreq_delayed_get_work;
568 static void handle_cpufreq_delayed_get(struct work_struct *v)
571 for_each_online_cpu(cpu) {
574 cpufreq_delayed_issched = 0;
577 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
578 * to verify the CPU frequency the timing core thinks the CPU is running
579 * at is still correct.
581 static void cpufreq_delayed_get(void)
584 if (cpufreq_init && !cpufreq_delayed_issched) {
585 cpufreq_delayed_issched = 1;
589 "Losing some ticks... checking if CPU frequency changed.\n");
591 schedule_work(&cpufreq_delayed_get_work);
595 static unsigned int ref_freq = 0;
596 static unsigned long loops_per_jiffy_ref = 0;
598 static unsigned long cpu_khz_ref = 0;
600 static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
603 struct cpufreq_freqs *freq = data;
604 unsigned long *lpj, dummy;
606 if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
610 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
612 lpj = &cpu_data[freq->cpu].loops_per_jiffy;
614 lpj = &boot_cpu_data.loops_per_jiffy;
618 ref_freq = freq->old;
619 loops_per_jiffy_ref = *lpj;
620 cpu_khz_ref = cpu_khz;
622 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
623 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
624 (val == CPUFREQ_RESUMECHANGE)) {
626 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
628 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
629 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
630 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
633 set_cyc2ns_scale(cpu_khz_ref);
638 static struct notifier_block time_cpufreq_notifier_block = {
639 .notifier_call = time_cpufreq_notifier
642 static int __init cpufreq_tsc(void)
644 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
645 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
646 CPUFREQ_TRANSITION_NOTIFIER))
651 core_initcall(cpufreq_tsc);
656 * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
657 * it to the HPET timer of known frequency.
660 #define TICK_COUNT 100000000
661 #define TICK_MIN 5000
662 #define MAX_READ_RETRIES 5
665 * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
666 * occurs between the reads of the hpet & TSC.
668 static void __init read_hpet_tsc(int *hpet, int *tsc)
670 int tsc1, tsc2, hpet1, retries = 0;
674 tsc1 = get_cycles_sync();
675 hpet1 = hpet_readl(HPET_COUNTER);
676 tsc2 = get_cycles_sync();
677 } while (tsc2 - tsc1 > TICK_MIN && retries++ < MAX_READ_RETRIES);
678 if (retries >= MAX_READ_RETRIES && !msg++)
680 "hpet.c: exceeded max retries to read HPET & TSC\n");
686 static unsigned int __init hpet_calibrate_tsc(void)
688 int tsc_start, hpet_start;
689 int tsc_now, hpet_now;
692 local_irq_save(flags);
695 read_hpet_tsc(&hpet_start, &tsc_start);
699 read_hpet_tsc(&hpet_now, &tsc_now);
700 local_irq_restore(flags);
701 } while ((tsc_now - tsc_start) < TICK_COUNT &&
702 (hpet_now - hpet_start) < TICK_COUNT);
704 return (tsc_now - tsc_start) * 1000000000L
705 / ((hpet_now - hpet_start) * hpet_period / 1000);
710 * pit_calibrate_tsc() uses the speaker output (channel 2) of
711 * the PIT. This is better than using the timer interrupt output,
712 * because we can read the value of the speaker with just one inb(),
713 * where we need three i/o operations for the interrupt channel.
714 * We count how many ticks the TSC does in 50 ms.
717 static unsigned int __init pit_calibrate_tsc(void)
719 unsigned long start, end;
722 spin_lock_irqsave(&i8253_lock, flags);
724 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
727 outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
728 outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
729 start = get_cycles_sync();
730 while ((inb(0x61) & 0x20) == 0);
731 end = get_cycles_sync();
733 spin_unlock_irqrestore(&i8253_lock, flags);
735 return (end - start) / 50;
739 static __init int late_hpet_init(void)
747 memset(&hd, 0, sizeof (hd));
749 ntimer = hpet_readl(HPET_ID);
750 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
754 * Register with driver.
755 * Timer0 and Timer1 is used by platform.
757 hd.hd_phys_address = hpet_address;
758 hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
759 hd.hd_nirqs = ntimer;
760 hd.hd_flags = HPET_DATA_PLATFORM;
761 hpet_reserve_timer(&hd, 0);
762 #ifdef CONFIG_HPET_EMULATE_RTC
763 hpet_reserve_timer(&hd, 1);
765 hd.hd_irq[0] = HPET_LEGACY_8254;
766 hd.hd_irq[1] = HPET_LEGACY_RTC;
769 struct hpet_timer *timer;
772 hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
773 timer = &hpet->hpet_timers[2];
774 for (i = 2; i < ntimer; timer++, i++)
775 hd.hd_irq[i] = (timer->hpet_config &
776 Tn_INT_ROUTE_CNF_MASK) >>
777 Tn_INT_ROUTE_CNF_SHIFT;
784 fs_initcall(late_hpet_init);
787 static int hpet_timer_stop_set_go(unsigned long tick)
792 * Stop the timers and reset the main counter.
795 cfg = hpet_readl(HPET_CFG);
796 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
797 hpet_writel(cfg, HPET_CFG);
798 hpet_writel(0, HPET_COUNTER);
799 hpet_writel(0, HPET_COUNTER + 4);
802 * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
803 * and period also hpet_tick.
805 if (hpet_use_timer) {
806 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
807 HPET_TN_32BIT, HPET_T0_CFG);
808 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
809 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
810 cfg |= HPET_CFG_LEGACY;
816 cfg |= HPET_CFG_ENABLE;
817 hpet_writel(cfg, HPET_CFG);
822 static int hpet_init(void)
828 set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
829 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
832 * Read the period, compute tick and quotient.
835 id = hpet_readl(HPET_ID);
837 if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
840 hpet_period = hpet_readl(HPET_PERIOD);
841 if (hpet_period < 100000 || hpet_period > 100000000)
844 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
846 hpet_use_timer = (id & HPET_ID_LEGSUP);
848 return hpet_timer_stop_set_go(hpet_tick);
851 static int hpet_reenable(void)
853 return hpet_timer_stop_set_go(hpet_tick);
856 #define PIT_MODE 0x43
859 static void __init __pit_init(int val, u8 mode)
863 spin_lock_irqsave(&i8253_lock, flags);
864 outb_p(mode, PIT_MODE);
865 outb_p(val & 0xff, PIT_CH0); /* LSB */
866 outb_p(val >> 8, PIT_CH0); /* MSB */
867 spin_unlock_irqrestore(&i8253_lock, flags);
870 void __init pit_init(void)
872 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
875 void __init pit_stop_interrupt(void)
877 __pit_init(0, 0x30); /* mode 0 */
880 void __init stop_timer_interrupt(void)
885 hpet_timer_stop_set_go(0);
888 pit_stop_interrupt();
890 printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
893 int __init time_setup(char *str)
895 report_lost_ticks = 1;
899 static struct irqaction irq0 = {
900 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
903 void __init time_init(void)
907 xtime.tv_sec = get_cmos_time();
910 set_normalized_timespec(&wall_to_monotonic,
911 -xtime.tv_sec, -xtime.tv_nsec);
914 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
918 if (hpet_use_timer) {
919 /* set tick_nsec to use the proper rate for HPET */
920 tick_nsec = TICK_NSEC_HPET;
921 cpu_khz = hpet_calibrate_tsc();
923 #ifdef CONFIG_X86_PM_TIMER
924 } else if (pmtmr_ioport && !hpet_address) {
925 vxtime_hz = PM_TIMER_FREQUENCY;
928 cpu_khz = pit_calibrate_tsc();
932 cpu_khz = pit_calibrate_tsc();
936 vxtime.mode = VXTIME_TSC;
937 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
938 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
939 vxtime.last_tsc = get_cycles_sync();
940 set_cyc2ns_scale(cpu_khz);
948 static int tsc_unstable = 0;
950 void mark_tsc_unstable(void)
954 EXPORT_SYMBOL_GPL(mark_tsc_unstable);
957 * Make an educated guess if the TSC is trustworthy and synchronized
960 __cpuinit int unsynchronized_tsc(void)
966 if (apic_is_clustered_box())
969 /* Most intel systems have synchronized TSCs except for
970 multi node systems */
971 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
973 /* But TSC doesn't tick in C3 so don't use it there */
974 if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
980 /* Assume multi socket systems are not synchronized */
981 return num_present_cpus() > 1;
985 * Decide what mode gettimeofday should use.
987 void time_init_gtod(void)
991 if (unsynchronized_tsc())
994 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
995 vgetcpu_mode = VGETCPU_RDTSCP;
997 vgetcpu_mode = VGETCPU_LSL;
999 if (hpet_address && notsc) {
1000 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
1002 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
1004 vxtime.last = hpet_readl(HPET_COUNTER);
1005 vxtime.mode = VXTIME_HPET;
1006 vxtime.hpet_address = hpet_address;
1007 do_gettimeoffset = do_gettimeoffset_hpet;
1008 #ifdef CONFIG_X86_PM_TIMER
1009 /* Using PM for gettimeofday is quite slow, but we have no other
1010 choice because the TSC is too unreliable on some systems. */
1011 } else if (pmtmr_ioport && !hpet_address && notsc) {
1013 do_gettimeoffset = do_gettimeoffset_pm;
1014 vxtime.mode = VXTIME_PMTMR;
1015 sysctl_vsyscall = 0;
1016 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
1019 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
1020 vxtime.mode = VXTIME_TSC;
1023 printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
1024 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
1025 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
1026 cpu_khz / 1000, cpu_khz % 1000);
1027 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
1028 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
1029 vxtime.last_tsc = get_cycles_sync();
1031 set_cyc2ns_scale(cpu_khz);
1034 __setup("report_lost_ticks", time_setup);
1036 static long clock_cmos_diff;
1037 static unsigned long sleep_start;
1040 * sysfs support for the timer.
1043 static int timer_suspend(struct sys_device *dev, pm_message_t state)
1046 * Estimate time zone so that set_time can update the clock
1048 long cmos_time = get_cmos_time();
1050 clock_cmos_diff = -cmos_time;
1051 clock_cmos_diff += get_seconds();
1052 sleep_start = cmos_time;
1056 static int timer_resume(struct sys_device *dev)
1058 unsigned long flags;
1060 unsigned long ctime = get_cmos_time();
1061 long sleep_length = (ctime - sleep_start) * HZ;
1063 if (sleep_length < 0) {
1064 printk(KERN_WARNING "Time skew detected in timer resume!\n");
1065 /* The time after the resume must not be earlier than the time
1066 * before the suspend or some nasty things will happen
1069 ctime = sleep_start;
1074 i8254_timer_resume();
1076 sec = ctime + clock_cmos_diff;
1077 write_seqlock_irqsave(&xtime_lock,flags);
1080 if (vxtime.mode == VXTIME_HPET) {
1082 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
1084 vxtime.last = hpet_readl(HPET_COUNTER);
1085 #ifdef CONFIG_X86_PM_TIMER
1086 } else if (vxtime.mode == VXTIME_PMTMR) {
1090 vxtime.last_tsc = get_cycles_sync();
1091 write_sequnlock_irqrestore(&xtime_lock,flags);
1092 jiffies += sleep_length;
1093 monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
1094 touch_softlockup_watchdog();
1098 static struct sysdev_class timer_sysclass = {
1099 .resume = timer_resume,
1100 .suspend = timer_suspend,
1101 set_kset_name("timer"),
1104 /* XXX this driverfs stuff should probably go elsewhere later -john */
1105 static struct sys_device device_timer = {
1107 .cls = &timer_sysclass,
1110 static int time_init_device(void)
1112 int error = sysdev_class_register(&timer_sysclass);
1114 error = sysdev_register(&device_timer);
1118 device_initcall(time_init_device);
1120 #ifdef CONFIG_HPET_EMULATE_RTC
1121 /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
1122 * is enabled, we support RTC interrupt functionality in software.
1123 * RTC has 3 kinds of interrupts:
1124 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
1126 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
1127 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
1128 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
1129 * (1) and (2) above are implemented using polling at a frequency of
1130 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
1131 * overhead. (DEFAULT_RTC_INT_FREQ)
1132 * For (3), we use interrupts at 64Hz or user specified periodic
1133 * frequency, whichever is higher.
1135 #include <linux/rtc.h>
1137 #define DEFAULT_RTC_INT_FREQ 64
1138 #define RTC_NUM_INTS 1
1140 static unsigned long UIE_on;
1141 static unsigned long prev_update_sec;
1143 static unsigned long AIE_on;
1144 static struct rtc_time alarm_time;
1146 static unsigned long PIE_on;
1147 static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
1148 static unsigned long PIE_count;
1150 static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
1151 static unsigned int hpet_t1_cmp; /* cached comparator register */
1153 int is_hpet_enabled(void)
1155 return hpet_address != 0;
1159 * Timer 1 for RTC, we do not use periodic interrupt feature,
1160 * even if HPET supports periodic interrupts on Timer 1.
1161 * The reason being, to set up a periodic interrupt in HPET, we need to
1162 * stop the main counter. And if we do that everytime someone diables/enables
1163 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
1164 * So, for the time being, simulate the periodic interrupt in software.
1166 * hpet_rtc_timer_init() is called for the first time and during subsequent
1167 * interuppts reinit happens through hpet_rtc_timer_reinit().
1169 int hpet_rtc_timer_init(void)
1171 unsigned int cfg, cnt;
1172 unsigned long flags;
1174 if (!is_hpet_enabled())
1177 * Set the counter 1 and enable the interrupts.
1179 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1180 hpet_rtc_int_freq = PIE_freq;
1182 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1184 local_irq_save(flags);
1186 cnt = hpet_readl(HPET_COUNTER);
1187 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
1188 hpet_writel(cnt, HPET_T1_CMP);
1191 cfg = hpet_readl(HPET_T1_CFG);
1192 cfg &= ~HPET_TN_PERIODIC;
1193 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
1194 hpet_writel(cfg, HPET_T1_CFG);
1196 local_irq_restore(flags);
1201 static void hpet_rtc_timer_reinit(void)
1203 unsigned int cfg, cnt, ticks_per_int, lost_ints;
1205 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
1206 cfg = hpet_readl(HPET_T1_CFG);
1207 cfg &= ~HPET_TN_ENABLE;
1208 hpet_writel(cfg, HPET_T1_CFG);
1212 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1213 hpet_rtc_int_freq = PIE_freq;
1215 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1217 /* It is more accurate to use the comparator value than current count.*/
1218 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
1219 hpet_t1_cmp += ticks_per_int;
1220 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1223 * If the interrupt handler was delayed too long, the write above tries
1224 * to schedule the next interrupt in the past and the hardware would
1225 * not interrupt until the counter had wrapped around.
1226 * So we have to check that the comparator wasn't set to a past time.
1228 cnt = hpet_readl(HPET_COUNTER);
1229 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
1230 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
1231 /* Make sure that, even with the time needed to execute
1232 * this code, the next scheduled interrupt has been moved
1233 * back to the future: */
1236 hpet_t1_cmp += lost_ints * ticks_per_int;
1237 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1240 PIE_count += lost_ints;
1242 if (printk_ratelimit())
1243 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
1249 * The functions below are called from rtc driver.
1250 * Return 0 if HPET is not being used.
1251 * Otherwise do the necessary changes and return 1.
1253 int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
1255 if (!is_hpet_enabled())
1258 if (bit_mask & RTC_UIE)
1260 if (bit_mask & RTC_PIE)
1262 if (bit_mask & RTC_AIE)
1268 int hpet_set_rtc_irq_bit(unsigned long bit_mask)
1270 int timer_init_reqd = 0;
1272 if (!is_hpet_enabled())
1275 if (!(PIE_on | AIE_on | UIE_on))
1276 timer_init_reqd = 1;
1278 if (bit_mask & RTC_UIE) {
1281 if (bit_mask & RTC_PIE) {
1285 if (bit_mask & RTC_AIE) {
1289 if (timer_init_reqd)
1290 hpet_rtc_timer_init();
1295 int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
1297 if (!is_hpet_enabled())
1300 alarm_time.tm_hour = hrs;
1301 alarm_time.tm_min = min;
1302 alarm_time.tm_sec = sec;
1307 int hpet_set_periodic_freq(unsigned long freq)
1309 if (!is_hpet_enabled())
1318 int hpet_rtc_dropped_irq(void)
1320 if (!is_hpet_enabled())
1326 irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
1328 struct rtc_time curr_time;
1329 unsigned long rtc_int_flag = 0;
1330 int call_rtc_interrupt = 0;
1332 hpet_rtc_timer_reinit();
1334 if (UIE_on | AIE_on) {
1335 rtc_get_rtc_time(&curr_time);
1338 if (curr_time.tm_sec != prev_update_sec) {
1339 /* Set update int info, call real rtc int routine */
1340 call_rtc_interrupt = 1;
1341 rtc_int_flag = RTC_UF;
1342 prev_update_sec = curr_time.tm_sec;
1347 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
1348 /* Set periodic int info, call real rtc int routine */
1349 call_rtc_interrupt = 1;
1350 rtc_int_flag |= RTC_PF;
1355 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
1356 (curr_time.tm_min == alarm_time.tm_min) &&
1357 (curr_time.tm_hour == alarm_time.tm_hour)) {
1358 /* Set alarm int info, call real rtc int routine */
1359 call_rtc_interrupt = 1;
1360 rtc_int_flag |= RTC_AF;
1363 if (call_rtc_interrupt) {
1364 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
1365 rtc_interrupt(rtc_int_flag, dev_id);
1371 static int __init nohpet_setup(char *s)
1377 __setup("nohpet", nohpet_setup);
1379 int __init notsc_setup(char *s)
1385 __setup("notsc", notsc_setup);