[PATCH] improve precision of idle time detection.
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Wed, 31 Dec 2008 14:11:40 +0000 (15:11 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Wed, 31 Dec 2008 14:11:47 +0000 (15:11 +0100)
Increase the precision of the idle time calculation that is exported
to user space via /sys/devices/system/cpu/cpu<x>/idle_time_us

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/cpu.h
arch/s390/kernel/process.c
arch/s390/kernel/smp.c
arch/s390/kernel/vtime.c

index e5a6a9b..89456df 100644 (file)
@@ -14,7 +14,6 @@
 
 struct s390_idle_data {
        spinlock_t lock;
-       unsigned int in_idle;
        unsigned long long idle_count;
        unsigned long long idle_enter;
        unsigned long long idle_time;
@@ -26,7 +25,7 @@ void s390_idle_leave(void);
 
 static inline void s390_idle_check(void)
 {
-       if ((&__get_cpu_var(s390_idle))->in_idle)
+       if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
                s390_idle_leave();
 }
 
index 04f8c67..1e06436 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/utsname.h>
 #include <linux/tick.h>
 #include <linux/elfcore.h>
+#include <linux/kernel_stat.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -79,30 +80,19 @@ DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
        .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
 };
 
-static int s390_idle_enter(void)
+void s390_idle_leave(void)
 {
        struct s390_idle_data *idle;
+       unsigned long long idle_time;
 
        idle = &__get_cpu_var(s390_idle);
+       idle_time = S390_lowcore.int_clock - idle->idle_enter;
        spin_lock(&idle->lock);
+       idle->idle_time += idle_time;
+       idle->idle_enter = 0ULL;
        idle->idle_count++;
-       idle->in_idle = 1;
-       idle->idle_enter = get_clock();
        spin_unlock(&idle->lock);
-       vtime_stop_cpu_timer();
-       return NOTIFY_OK;
-}
-
-void s390_idle_leave(void)
-{
-       struct s390_idle_data *idle;
-
        vtime_start_cpu_timer();
-       idle = &__get_cpu_var(s390_idle);
-       spin_lock(&idle->lock);
-       idle->idle_time += get_clock() - idle->idle_enter;
-       idle->in_idle = 0;
-       spin_unlock(&idle->lock);
 }
 
 extern void s390_handle_mcck(void);
@@ -111,16 +101,16 @@ extern void s390_handle_mcck(void);
  */
 static void default_idle(void)
 {
+       struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+       unsigned long addr;
+       psw_t psw;
+
        /* CPU is going idle. */
        local_irq_disable();
        if (need_resched()) {
                local_irq_enable();
                return;
        }
-       if (s390_idle_enter() == NOTIFY_BAD) {
-               local_irq_enable();
-               return;
-       }
 #ifdef CONFIG_HOTPLUG_CPU
        if (cpu_is_offline(smp_processor_id())) {
                preempt_enable_no_resched();
@@ -138,9 +128,42 @@ static void default_idle(void)
        trace_hardirqs_on();
        /* Don't trace preempt off for idle. */
        stop_critical_timings();
+       vtime_stop_cpu_timer();
+
+       /*
+        * The inline assembly is equivalent to
+        *      idle->idle_enter = get_clock();
+        *      __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
+        *                         PSW_MASK_IO | PSW_MASK_EXT);
+        * The difference is that the inline assembly makes sure that
+        * the stck instruction is right before the lpsw instruction.
+        * This is done to increase the precision.
+        */
+
        /* Wait for external, I/O or machine check interrupt. */
-       __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-                       PSW_MASK_IO | PSW_MASK_EXT);
+       psw.mask = psw_kernel_bits|PSW_MASK_WAIT|PSW_MASK_IO|PSW_MASK_EXT;
+#ifndef __s390x__
+       asm volatile(
+               "       basr    %0,0\n"
+               "0:     ahi     %0,1f-0b\n"
+               "       st      %0,4(%2)\n"
+               "       stck    0(%3)\n"
+               "       lpsw    0(%2)\n"
+               "1:"
+               : "=&d" (addr), "=m" (idle->idle_enter)
+               : "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
+               : "memory", "cc");
+#else /* __s390x__ */
+       asm volatile(
+               "       larl    %0,1f\n"
+               "       stg     %0,8(%2)\n"
+               "       stck    0(%3)\n"
+               "       lpswe   0(%2)\n"
+               "1:"
+               : "=&d" (addr), "=m" (idle->idle_enter)
+               : "a" (&psw), "a" (&idle->idle_enter), "m" (psw)
+               : "memory", "cc");
+#endif /* __s390x__ */
        start_critical_timings();
 }
 
index 6fc7854..3979a6f 100644 (file)
@@ -851,9 +851,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
        unsigned long long idle_count;
 
        idle = &per_cpu(s390_idle, dev->id);
-       spin_lock_irq(&idle->lock);
+       spin_lock(&idle->lock);
        idle_count = idle->idle_count;
-       spin_unlock_irq(&idle->lock);
+       if (idle->idle_enter)
+               idle_count++;
+       spin_unlock(&idle->lock);
        return sprintf(buf, "%llu\n", idle_count);
 }
 static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -862,18 +864,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
                                struct sysdev_attribute *attr, char *buf)
 {
        struct s390_idle_data *idle;
-       unsigned long long new_time;
+       unsigned long long now, idle_time, idle_enter;
 
        idle = &per_cpu(s390_idle, dev->id);
-       spin_lock_irq(&idle->lock);
-       if (idle->in_idle) {
-               new_time = get_clock();
-               idle->idle_time += new_time - idle->idle_enter;
-               idle->idle_enter = new_time;
-       }
-       new_time = idle->idle_time;
-       spin_unlock_irq(&idle->lock);
-       return sprintf(buf, "%llu\n", new_time >> 12);
+       spin_lock(&idle->lock);
+       now = get_clock();
+       idle_time = idle->idle_time;
+       idle_enter = idle->idle_enter;
+       if (idle_enter != 0ULL && idle_enter < now)
+               idle_time += now - idle_enter;
+       spin_unlock(&idle->lock);
+       return sprintf(buf, "%llu\n", idle_time >> 12);
 }
 static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
index 1254a4d..25d21fe 100644 (file)
@@ -112,6 +112,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
 
 static inline void set_vtimer(__u64 expires)
 {
+       struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
        __u64 timer;
 
        asm volatile ("  STPT %0\n"  /* Store current cpu timer value */
@@ -121,7 +122,7 @@ static inline void set_vtimer(__u64 expires)
        S390_lowcore.last_update_timer = expires;
 
        /* store expire time for this CPU timer */
-       __get_cpu_var(virt_cpu_timer).to_expire = expires;
+       vq->to_expire = expires;
 }
 
 void vtime_start_cpu_timer(void)