[PATCH] x86_64: Bring x86-64 ia32 emul in sync with i386 on READ_IMPLIES_EXEC enabling
[safe/jmp/linux-2.6] / arch / x86_64 / kernel / nmi.c
index 4e44d6e..5baa0c7 100644 (file)
  *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
  */
 
-#include <linux/config.h>
 #include <linux/mm.h>
-#include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/nmi.h>
 #include <linux/sysctl.h>
+#include <linux/kprobes.h>
 
 #include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
 #include <asm/nmi.h>
-#include <asm/msr.h>
 #include <asm/proto.h>
 #include <asm/kdebug.h>
-#include <asm/local.h>
+#include <asm/mce.h>
+#include <asm/intel_arch_perfmon.h>
 
 /*
  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -73,6 +66,9 @@ static unsigned int nmi_p4_cccr_val;
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
 #define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 
+#define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK   ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
 #define MSR_P4_MISC_ENABLE     0x1A0
 #define MSR_P4_MISC_ENABLE_PERF_AVAIL  (1<<7)
 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL        (1<<12)
@@ -104,7 +100,10 @@ static __cpuinit inline int nmi_known_cpu(void)
        case X86_VENDOR_AMD:
                return boot_cpu_data.x86 == 15;
        case X86_VENDOR_INTEL:
-               return boot_cpu_data.x86 == 15;
+               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+                       return 1;
+               else
+                       return (boot_cpu_data.x86 == 15);
        }
        return 0;
 }
@@ -128,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void)
 static __init void nmi_cpu_busy(void *data)
 {
        volatile int *endflag = data;
-       local_irq_enable();
+       local_irq_enable_in_hardirq();
        /* Intentionally don't use cpu_relax here. This is
           to make sure that the performance counter really ticks,
           even if there is a simulator or similar that catches the
@@ -152,23 +151,23 @@ int __init check_nmi_watchdog (void)
 
        printk(KERN_INFO "testing NMI watchdog ... ");
 
+#ifdef CONFIG_SMP
        if (nmi_watchdog == NMI_LOCAL_APIC)
                smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+#endif
 
        for (cpu = 0; cpu < NR_CPUS; cpu++)
-               counts[cpu] = cpu_pda[cpu].__nmi_count; 
+               counts[cpu] = cpu_pda(cpu)->__nmi_count;
        local_irq_enable();
        mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               if (!cpu_online(cpu))
-                       continue;
-               if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
+       for_each_online_cpu(cpu) {
+               if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
                        endflag = 1;
                        printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
                               cpu,
                               counts[cpu],
-                              cpu_pda[cpu].__nmi_count);
+                              cpu_pda(cpu)->__nmi_count);
                        nmi_active = 0;
                        lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
                        nmi_perfctr_msr = 0;
@@ -210,6 +209,8 @@ int __init setup_nmi_watchdog(char *str)
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
+static void disable_intel_arch_watchdog(void);
+
 static void disable_lapic_nmi_watchdog(void)
 {
        if (nmi_active <= 0)
@@ -222,6 +223,8 @@ static void disable_lapic_nmi_watchdog(void)
                if (boot_cpu_data.x86 == 15) {
                        wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+               } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+                       disable_intel_arch_watchdog();
                }
                break;
        }
@@ -234,6 +237,7 @@ static void enable_lapic_nmi_watchdog(void)
 {
        if (nmi_active < 0) {
                nmi_watchdog = NMI_LOCAL_APIC;
+               touch_nmi_watchdog();
                setup_apic_nmi_watchdog();
        }
 }
@@ -290,7 +294,7 @@ void enable_timer_nmi_watchdog(void)
 
 static int nmi_pm_active; /* nmi_active before suspend */
 
-static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
+static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
 {
        nmi_pm_active = nmi_active;
        disable_lapic_nmi_watchdog();
@@ -367,12 +371,59 @@ static void setup_k7_watchdog(void)
                | K7_NMI_EVENT;
 
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-       wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+       wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        evntsel |= K7_EVNTSEL_ENABLE;
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 }
 
+static void disable_intel_arch_watchdog(void)
+{
+       unsigned ebx;
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Unhalted Core Cycles Event or not.
+        * NOTE: Corresponding bit = 0 in ebp indicates event present.
+        */
+       ebx = cpuid_ebx(10);
+       if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+               wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+       unsigned int evntsel;
+       unsigned ebx;
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Unhalted Core Cycles Event or not.
+        * NOTE: Corresponding bit = 0 in ebp indicates event present.
+        */
+       ebx = cpuid_ebx(10);
+       if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+               return 0;
+
+       nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+       clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+       clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+       evntsel = ARCH_PERFMON_EVENTSEL_INT
+               | ARCH_PERFMON_EVENTSEL_OS
+               | ARCH_PERFMON_EVENTSEL_USR
+               | ARCH_PERFMON_NMI_EVENT_SEL
+               | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+       wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+       wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+       wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+       return 1;
+}
+
 
 static int setup_p4_watchdog(void)
 {
@@ -408,8 +459,8 @@ static int setup_p4_watchdog(void)
 
        wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
-       Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
-       wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+       Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz));
+       wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz));
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
        return 1;
@@ -426,10 +477,16 @@ void setup_apic_nmi_watchdog(void)
                setup_k7_watchdog();
                break;
        case X86_VENDOR_INTEL:
-               if (boot_cpu_data.x86 != 15)
-                       return;
-               if (!setup_p4_watchdog())
+               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+                       if (!setup_intel_arch_watchdog())
+                               return;
+               } else if (boot_cpu_data.x86 == 15) {
+                       if (!setup_p4_watchdog())
+                               return;
+               } else {
                        return;
+               }
+
                break;
 
        default:
@@ -454,18 +511,22 @@ static DEFINE_PER_CPU(int, nmi_touch);
 
 void touch_nmi_watchdog (void)
 {
-       int i;
+       if (nmi_watchdog > 0) {
+               unsigned cpu;
 
-       /*
-        * Tell other CPUs to reset their alert counters. We cannot
-        * do it ourselves because the alert count increase is not
-        * atomic.
-        */
-       for (i = 0; i < NR_CPUS; i++)
-               per_cpu(nmi_touch, i) = 1;
+               /*
+                * Tell other CPUs to reset their alert counters. We cannot
+                * do it ourselves because the alert count increase is not
+                * atomic.
+                */
+               for_each_present_cpu (cpu)
+                       per_cpu(nmi_touch, cpu) = 1;
+       }
+
+       touch_softlockup_watchdog();
 }
 
-void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
+void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 {
        int sum;
        int touched = 0;
@@ -475,6 +536,12 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
                __get_cpu_var(nmi_touch) = 0;
                touched = 1;
        }
+#ifdef CONFIG_X86_MCE
+       /* Could check oops_in_progress here too, but it's safer
+          not too */
+       if (atomic_read(&mce_entry) > 0)
+               touched = 1;
+#endif
        if (!touched && __get_cpu_var(last_irq_sum) == sum) {
                /*
                 * Ayiee, looks like this CPU is stuck ...
@@ -486,8 +553,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
                                                        == NOTIFY_STOP) {
                                local_set(&__get_cpu_var(alert_counter), 0);
                                return;
-                       } 
-                       die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
+                       }
+                       die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs);
                }
        } else {
                __get_cpu_var(last_irq_sum) = sum;
@@ -504,38 +571,48 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
                         */
                        wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
                        apic_write(APIC_LVTPC, APIC_DM_NMI);
-               }
-               wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+               } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+                       /*
+                        * For Intel based architectural perfmon
+                        * - LVTPC is masked on interrupt and must be
+                        *   unmasked by the LVTPC handler.
+                        */
+                       apic_write(APIC_LVTPC, APIC_DM_NMI);
+               }
+               wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
        }
 }
 
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu)
 {
        return 0;
 }
  
 static nmi_callback_t nmi_callback = dummy_nmi_callback;
  
-asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
        int cpu = safe_smp_processor_id();
 
        nmi_enter();
        add_pda(__nmi_count,1);
-       if (!nmi_callback(regs, cpu))
+       if (!rcu_dereference(nmi_callback)(regs, cpu))
                default_do_nmi(regs);
        nmi_exit();
 }
 
 void set_nmi_callback(nmi_callback_t callback)
 {
-       nmi_callback = callback;
+       vmalloc_sync_all();
+       rcu_assign_pointer(nmi_callback, callback);
 }
+EXPORT_SYMBOL_GPL(set_nmi_callback);
 
 void unset_nmi_callback(void)
 {
        nmi_callback = dummy_nmi_callback;
 }
+EXPORT_SYMBOL_GPL(unset_nmi_callback);
 
 #ifdef CONFIG_SYSCTL