x86, cacheinfo: Remove NUMA dependency, fix for AMD Fam10h rev D1
[safe/jmp/linux-2.6] / arch / x86 / kernel / cpu / perfctr-watchdog.c
index 2e9bef6..898df97 100644 (file)
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
+#include <linux/kprobes.h>
+
 #include <asm/apic.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_event.h>
 
 struct nmi_watchdog_ctlblk {
        unsigned int cccr_msr;
@@ -66,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
        /* returns the bit offset of the performance counter register */
        switch (boot_cpu_data.x86_vendor) {
        case X86_VENDOR_AMD:
-               return (msr - MSR_K7_PERFCTR0);
+               return msr - MSR_K7_PERFCTR0;
        case X86_VENDOR_INTEL:
                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-                       return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+                       return msr - MSR_ARCH_PERFMON_PERFCTR0;
 
                switch (boot_cpu_data.x86) {
                case 6:
-                       return (msr - MSR_P6_PERFCTR0);
+                       return msr - MSR_P6_PERFCTR0;
                case 15:
-                       return (msr - MSR_P4_BPU_PERFCTR0);
+                       return msr - MSR_P4_BPU_PERFCTR0;
                }
        }
        return 0;
@@ -90,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
        /* returns the bit offset of the event selection register */
        switch (boot_cpu_data.x86_vendor) {
        case X86_VENDOR_AMD:
-               return (msr - MSR_K7_EVNTSEL0);
+               return msr - MSR_K7_EVNTSEL0;
        case X86_VENDOR_INTEL:
                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-                       return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+                       return msr - MSR_ARCH_PERFMON_EVENTSEL0;
 
                switch (boot_cpu_data.x86) {
                case 6:
-                       return (msr - MSR_P6_EVNTSEL0);
+                       return msr - MSR_P6_EVNTSEL0;
                case 15:
-                       return (msr - MSR_P4_BSU_ESCR0);
+                       return msr - MSR_P4_BSU_ESCR0;
                }
        }
        return 0;
@@ -111,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 {
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       return (!test_bit(counter, perfctr_nmi_owner));
+       return !test_bit(counter, perfctr_nmi_owner);
 }
 
 /* checks the an msr for availability */
@@ -122,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
        counter = nmi_perfctr_msr_to_bit(msr);
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       return (!test_bit(counter, perfctr_nmi_owner));
+       return !test_bit(counter, perfctr_nmi_owner);
 }
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 
@@ -189,7 +191,7 @@ void disable_lapic_nmi_watchdog(void)
        if (atomic_read(&nmi_active) <= 0)
                return;
 
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 
        if (wd_ops)
                wd_ops->unreserve();
@@ -213,7 +215,7 @@ void enable_lapic_nmi_watchdog(void)
                return;
        }
 
-       on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+       on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
        touch_nmi_watchdog();
 }
 
@@ -235,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
         */
        counter_val = (u64)cpu_khz * 1000;
        do_div(counter_val, retval);
-       if (counter_val > 0x7fffffffULL) {
+       if (counter_val > 0x7fffffffULL) {
                u64 count = (u64)cpu_khz * 1000;
                do_div(count, 0x7fffffffUL);
                retval = count + 1;
@@ -249,8 +251,8 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
        u64 count = (u64)cpu_khz * 1000;
 
        do_div(count, nmi_hz);
-       if(descr)
-               Dprintk("setting %s to -0x%08Lx\n", descr, count);
+       if (descr)
+               pr_debug("setting %s to -0x%08Lx\n", descr, count);
        wrmsrl(perfctr_msr, 0 - count);
 }
 
@@ -260,8 +262,8 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
        u64 count = (u64)cpu_khz * 1000;
 
        do_div(count, nmi_hz);
-       if(descr)
-               Dprintk("setting %s to -0x%08Lx\n", descr, count);
+       if (descr)
+               pr_debug("setting %s to -0x%08Lx\n", descr, count);
        wrmsr(perfctr_msr, (u32)(-count), 0);
 }
 
@@ -294,14 +296,20 @@ static int setup_k7_watchdog(unsigned nmi_hz)
 
        /* setup the timer */
        wrmsr(evntsel_msr, evntsel, 0);
-       write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= K7_EVNTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
+       write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
 
+       /* initialize the wd struct before enabling */
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = 0;  /* unused */
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= K7_EVNTSEL_ENABLE;
+       wrmsr(evntsel_msr, evntsel, 0);
+
        return 1;
 }
 
@@ -330,7 +338,8 @@ static void single_msr_unreserve(void)
        release_perfctr_nmi(wd_ops->perfctr);
 }
 
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        /* start the cycle over again */
        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -378,18 +387,24 @@ static int setup_p6_watchdog(unsigned nmi_hz)
        /* setup the timer */
        wrmsr(evntsel_msr, evntsel, 0);
        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= P6_EVNTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
+       write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
 
+       /* initialize the wd struct before enabling */
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = 0;  /* unused */
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= P6_EVNTSEL0_ENABLE;
+       wrmsr(evntsel_msr, evntsel, 0);
+
        return 1;
 }
 
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        /*
         * P6 based Pentium M need to re-unmask
@@ -400,7 +415,7 @@ static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
        apic_write(APIC_LVTPC, APIC_DM_NMI);
 
        /* P6/ARCH_PERFMON has 32 bit counter write */
-       write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
+       write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
 }
 
 static const struct wd_ops p6_wd_ops = {
@@ -432,6 +447,27 @@ static const struct wd_ops p6_wd_ops = {
 #define P4_CCCR_ENABLE         (1 << 12)
 #define P4_CCCR_OVF            (1 << 31)
 
+#define P4_CONTROLS 18
+static unsigned int p4_controls[18] = {
+       MSR_P4_BPU_CCCR0,
+       MSR_P4_BPU_CCCR1,
+       MSR_P4_BPU_CCCR2,
+       MSR_P4_BPU_CCCR3,
+       MSR_P4_MS_CCCR0,
+       MSR_P4_MS_CCCR1,
+       MSR_P4_MS_CCCR2,
+       MSR_P4_MS_CCCR3,
+       MSR_P4_FLAME_CCCR0,
+       MSR_P4_FLAME_CCCR1,
+       MSR_P4_FLAME_CCCR2,
+       MSR_P4_FLAME_CCCR3,
+       MSR_P4_IQ_CCCR0,
+       MSR_P4_IQ_CCCR1,
+       MSR_P4_IQ_CCCR2,
+       MSR_P4_IQ_CCCR3,
+       MSR_P4_IQ_CCCR4,
+       MSR_P4_IQ_CCCR5,
+};
 /*
  * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  * CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -454,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz)
        if (smp_num_siblings == 2) {
                unsigned int ebx, apicid;
 
-               ebx = cpuid_ebx(1);
-               apicid = (ebx >> 24) & 0xff;
-               ht_num = apicid & 1;
+               ebx = cpuid_ebx(1);
+               apicid = (ebx >> 24) & 0xff;
+               ht_num = apicid & 1;
        } else
 #endif
                ht_num = 0;
@@ -473,16 +509,42 @@ static int setup_p4_watchdog(unsigned nmi_hz)
                evntsel_msr = MSR_P4_CRU_ESCR0;
                cccr_msr = MSR_P4_IQ_CCCR0;
                cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+
+               /*
+                * If we're on the kdump kernel or other situation, we may
+                * still have other performance counter registers set to
+                * interrupt and they'll keep interrupting forever because
+                * of the P4_CCCR_OVF quirk. So we need to ACK all the
+                * pending interrupts and disable all the registers here,
+                * before reenabling the NMI delivery. Refer to p4_rearm()
+                * about the P4_CCCR_OVF quirk.
+                */
+               if (reset_devices) {
+                       unsigned int low, high;
+                       int i;
+
+                       for (i = 0; i < P4_CONTROLS; i++) {
+                               rdmsr(p4_controls[i], low, high);
+                               low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
+                               wrmsr(p4_controls[i], low, high);
+                       }
+               }
        } else {
                /* logical cpu 1 */
                perfctr_msr = MSR_P4_IQ_PERFCTR1;
                evntsel_msr = MSR_P4_CRU_ESCR0;
                cccr_msr = MSR_P4_IQ_CCCR1;
-               cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+
+               /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
+               if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
+                       cccr_val = P4_CCCR_OVF_PMI0;
+               else
+                       cccr_val = P4_CCCR_OVF_PMI1;
+               cccr_val |= P4_CCCR_ESCR_SELECT(4);
        }
 
        evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-               | P4_ESCR_OS
+               | P4_ESCR_OS
                | P4_ESCR_USR;
 
        cccr_val |= P4_CCCR_THRESHOLD(15)
@@ -493,12 +555,17 @@ static int setup_p4_watchdog(unsigned nmi_hz)
        wrmsr(evntsel_msr, evntsel, 0);
        wrmsr(cccr_msr, cccr_val, 0);
        write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       cccr_val |= P4_CCCR_ENABLE;
-       wrmsr(cccr_msr, cccr_val, 0);
+
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = cccr_msr;
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       cccr_val |= P4_CCCR_ENABLE;
+       wrmsr(cccr_msr, cccr_val, 0);
        return 1;
 }
 
@@ -541,11 +608,11 @@ static void p4_unreserve(void)
        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 }
 
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        unsigned dummy;
        /*
-        * P4 quirks:
+        * P4 quirks:
         * - An overflown perfctr will assert its interrupt
         *   until the OVF flag in its CCCR is cleared.
         * - LVTPC is masked on interrupt and must be
@@ -595,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
         * NOTE: Corresponding bit = 0 in ebx indicates event present.
         */
        cpuid(10, &(eax.full), &ebx, &unused, &unused);
-       if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+       if ((eax.split.mask_length <
+                       (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
            (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
                return 0;
 
@@ -614,13 +682,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
        wrmsr(evntsel_msr, evntsel, 0);
        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
        write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
 
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = 0;  /* unused */
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+       wrmsr(evntsel_msr, evntsel, 0);
        intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
        return 1;
 }
@@ -640,16 +712,20 @@ static void probe_nmi_watchdog(void)
        switch (boot_cpu_data.x86_vendor) {
        case X86_VENDOR_AMD:
                if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
-                   boot_cpu_data.x86 != 16)
+                   boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
                        return;
                wd_ops = &k7_wd_ops;
                break;
        case X86_VENDOR_INTEL:
-               /*
-                * Work around Core Duo (Yonah) errata AE49 where perfctr1
-                * doesn't have a working enable bit.
+               /* Work around where perfctr1 doesn't have a working enable
+                * bit as described in the following errata:
+                * AE49 Core Duo and Intel Core Solo 65 nm
+                * AN49 Intel Pentium Dual-Core
+                * AF49 Dual-Core Intel Xeon Processor LV
                 */
-               if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
+               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
+                   ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
+                    boot_cpu_data.x86_mask == 4))) {
                        intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
                        intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
                }
@@ -716,7 +792,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
        return hz;
 }
 
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
 {
        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
        u64 ctr;
@@ -728,8 +804,3 @@ int lapic_wd_event(unsigned nmi_hz)
        wd_ops->rearm(wd, nmi_hz);
        return 1;
 }
-
-int lapic_watchdog_ok(void)
-{
-       return wd_ops != NULL;
-}