I just found out that some precision is unnecessarily lost in the
arch/i386/kernel/timers/timer_tsc.c:set_cyc2ns_scale function. It uses a
cpu_mhz parameter when it could use a cpu_khz. In the specific case of an
Intel P4 running at 3001.171 Mhz, the truncation to 3001 Mhz leads to an
imprecision of 19 microseconds per second : this is very sad for a timer with
nearly nanosecond accuracy.
Fix the x86_64 architecture too.
Cc: george anzinger <george@mvista.com>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
*
* Then we use scaling math (suggested by george@mvista.com) to get:
*
* Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
* into a shift.
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
* into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better percision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
static unsigned long cyc2ns_scale;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
static unsigned long cyc2ns_scale;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+ cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
printk("Detected %u.%03u MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
}
printk("Detected %u.%03u MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
}
- set_cyc2ns_scale(cpu_khz/1000);
+ set_cyc2ns_scale(cpu_khz);
}
/* set this only when cpu_has_tsc */
timer_hpet.read_timer = read_timer_tsc;
}
/* set this only when cpu_has_tsc */
timer_hpet.read_timer = read_timer_tsc;
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
*
* Then we use scaling math (suggested by george@mvista.com) to get:
*
* Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better percision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
static unsigned long cyc2ns_scale;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
static unsigned long cyc2ns_scale;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+ cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
if (use_tsc) {
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
if (use_tsc) {
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz/1000);
+ set_cyc2ns_scale(cpu_khz);
printk("Detected %u.%03u MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
}
printk("Detected %u.%03u MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
}
- set_cyc2ns_scale(cpu_khz/1000);
+ set_cyc2ns_scale(cpu_khz);
static unsigned int cyc2ns_scale;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static unsigned int cyc2ns_scale;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+ cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
}
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
}
- set_cyc2ns_scale(cpu_khz_ref / 1000);
+ set_cyc2ns_scale(cpu_khz_ref);
rdtscll_sync(&vxtime.last_tsc);
setup_irq(0, &irq0);
rdtscll_sync(&vxtime.last_tsc);
setup_irq(0, &irq0);
- set_cyc2ns_scale(cpu_khz / 1000);
+ set_cyc2ns_scale(cpu_khz);
#ifndef CONFIG_SMP
time_init_gtod();
#ifndef CONFIG_SMP
time_init_gtod();