oprofile/x86: implement lsfr pseudo-random number generator for IBS
[safe/jmp/linux-2.6] / arch / x86 / oprofile / op_model_amd.c
index fdbed3a..97c84eb 100644 (file)
@@ -22,6 +22,9 @@
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/nmi.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
 #define MSR_AMD_EVENTSEL_RESERVED      ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
 static unsigned long reset_value[NUM_VIRT_COUNTERS];
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-DECLARE_PER_CPU(int, switch_index);
-#endif
-
-#ifdef CONFIG_OPROFILE_IBS
 
 /* IbsFetchCtl bits/masks */
 #define IBS_FETCH_RAND_EN              (1ULL<<57)
@@ -62,7 +60,7 @@ DECLARE_PER_CPU(int, switch_index);
 #define IBS_FETCH_SIZE                 6
 #define IBS_OP_SIZE                    12
 
-static int has_ibs;    /* AMD Family10h and later */
+static u32 ibs_caps;
 
 struct op_ibs_config {
        unsigned long op_enabled;
@@ -75,6 +73,77 @@ struct op_ibs_config {
 
 static struct op_ibs_config ibs_config;
 
+/*
+ * IBS cpuid feature detection
+ */
+
+#define IBS_CPUID_FEATURES      0x8000001b
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL                 (1LL<<0)
+#define IBS_CAPS_OPCNT                 (1LL<<4)
+
+static u32 get_ibs_caps(void)
+{
+       u32 ibs_caps;
+       unsigned int max_level;
+
+       if (!boot_cpu_has(X86_FEATURE_IBS))
+               return 0;
+
+       /* check IBS cpuid feature flags */
+       max_level = cpuid_eax(0x80000000);
+       if (max_level < IBS_CPUID_FEATURES)
+               return IBS_CAPS_AVAIL;
+
+       ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
+       if (!(ibs_caps & IBS_CAPS_AVAIL))
+               /* cpuid flags not valid */
+               return IBS_CAPS_AVAIL;
+
+       return ibs_caps;
+}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
+{
+       int i;
+
+       for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+               int hw_counter = op_x86_virt_to_phys(i);
+               if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+                       msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+               else
+                       msrs->multiplex[i].addr = 0;
+       }
+}
+
+static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
+                              struct op_msrs const * const msrs)
+{
+       u64 val;
+       int i;
+
+       /* enable active counters */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               int virt = op_x86_phys_to_virt(i);
+               if (!counter_config[virt].enabled)
+                       continue;
+               rdmsrl(msrs->controls[i].addr, val);
+               val &= model->reserved;
+               val |= op_x86_get_ctrl(model, &counter_config[virt]);
+               wrmsrl(msrs->controls[i].addr, val);
+       }
+}
+
+#else
+
+static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
+
 #endif
 
 /* functions for op_amd_spec */
@@ -97,15 +166,7 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
                        msrs->controls[i].addr = 0;
        }
 
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-       for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
-               int hw_counter = i % NUM_CONTROLS;
-               if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
-                       msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
-               else
-                       msrs->multiplex[i].addr = 0;
-       }
-#endif
+       op_mux_fill_in_addresses(msrs);
 }
 
 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -116,11 +177,10 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
        /* setup reset_value */
        for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
-               if (counter_config[i].enabled) {
+               if (counter_config[i].enabled)
                        reset_value[i] = counter_config[i].count;
-               } else {
+               else
                        reset_value[i] = 0;
-               }
        }
 
        /* clear all counters */
@@ -141,60 +201,55 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
        /* enable active counters */
        for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-               int offset = i + __get_cpu_var(switch_index);
-#else
-               int offset = i;
-#endif
-               if (counter_config[offset].enabled && msrs->counters[i].addr) {
-                       /* setup counter registers */
-                       wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
-
-                       /* setup control registers */
-                       rdmsrl(msrs->controls[i].addr, val);
-                       val &= model->reserved;
-                       val |= op_x86_get_ctrl(model, &counter_config[offset]);
-                       wrmsrl(msrs->controls[i].addr, val);
-               }
-       }
-}
-
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+               int virt = op_x86_phys_to_virt(i);
+               if (!counter_config[virt].enabled)
+                       continue;
+               if (!msrs->counters[i].addr)
+                       continue;
 
-static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
-                              struct op_msrs const * const msrs)
-{
-       u64 val;
-       int i;
+               /* setup counter registers */
+               wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
 
-       /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
-               int offset = i + __get_cpu_var(switch_index);
-               if (counter_config[offset].enabled) {
-                       /* setup control registers */
-                       rdmsrl(msrs->controls[i].addr, val);
-                       val &= model->reserved;
-                       val |= op_x86_get_ctrl(model, &counter_config[offset]);
-                       wrmsrl(msrs->controls[i].addr, val);
-               }
+               /* setup control registers */
+               rdmsrl(msrs->controls[i].addr, val);
+               val &= model->reserved;
+               val |= op_x86_get_ctrl(model, &counter_config[virt]);
+               wrmsrl(msrs->controls[i].addr, val);
        }
 }
 
-#endif
+/*
+ * 16-bit Linear Feedback Shift Register (LFSR)
+ *
+ *                       16   14   13    11
+ * Feedback polynomial = X  + X  + X  +  X  + 1
+ */
+static unsigned int lfsr_random(void)
+{
+       static unsigned int lfsr_value = 0xF00D;
+       unsigned int bit;
 
+       /* Compute next bit to shift in */
+       bit = ((lfsr_value >> 0) ^
+              (lfsr_value >> 2) ^
+              (lfsr_value >> 3) ^
+              (lfsr_value >> 5)) & 0x0001;
 
-#ifdef CONFIG_OPROFILE_IBS
+       /* Advance to next register value */
+       lfsr_value = (lfsr_value >> 1) | (bit << 15);
 
-static inline int
+       return lfsr_value;
+}
+
+static inline void
 op_amd_handle_ibs(struct pt_regs * const regs,
                  struct op_msrs const * const msrs)
 {
        u64 val, ctl;
        struct op_entry entry;
 
-       if (!has_ibs)
-               return 0;
+       if (!ibs_caps)
+               return;
 
        if (ibs_config.fetch_enabled) {
                rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
@@ -240,23 +295,26 @@ op_amd_handle_ibs(struct pt_regs * const regs,
                        wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
                }
        }
-
-       return 1;
 }
 
 static inline void op_amd_start_ibs(void)
 {
        u64 val;
-       if (has_ibs && ibs_config.fetch_enabled) {
+
+       if (!ibs_caps)
+               return;
+
+       if (ibs_config.fetch_enabled) {
                val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
                val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
                val |= IBS_FETCH_ENABLE;
                wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
        }
 
-       if (has_ibs && ibs_config.op_enabled) {
+       if (ibs_config.op_enabled) {
                val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
-               val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+               if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
+                       val |= IBS_OP_CNT_CTL;
                val |= IBS_OP_ENABLE;
                wrmsrl(MSR_AMD64_IBSOPCTL, val);
        }
@@ -264,27 +322,18 @@ static inline void op_amd_start_ibs(void)
 
 static void op_amd_stop_ibs(void)
 {
-       if (has_ibs && ibs_config.fetch_enabled)
+       if (!ibs_caps)
+               return;
+
+       if (ibs_config.fetch_enabled)
                /* clear max count and enable */
                wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
 
-       if (has_ibs && ibs_config.op_enabled)
+       if (ibs_config.op_enabled)
                /* clear max count and enable */
                wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
-#else
-
-static inline int op_amd_handle_ibs(struct pt_regs * const regs,
-                                   struct op_msrs const * const msrs)
-{
-       return 0;
-}
-static inline void op_amd_start_ibs(void) { }
-static inline void op_amd_stop_ibs(void) { }
-
-#endif
-
 static int op_amd_check_ctrs(struct pt_regs * const regs,
                             struct op_msrs const * const msrs)
 {
@@ -292,19 +341,15 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
        int i;
 
        for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-               int offset = i + __get_cpu_var(switch_index);
-#else
-               int offset = i;
-#endif
-               if (!reset_value[offset])
+               int virt = op_x86_phys_to_virt(i);
+               if (!reset_value[virt])
                        continue;
                rdmsrl(msrs->counters[i].addr, val);
                /* bit is clear if overflowed: */
                if (val & OP_CTR_OVERFLOW)
                        continue;
-               oprofile_add_sample(regs, offset);
-               wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
+               oprofile_add_sample(regs, virt);
+               wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
        }
 
        op_amd_handle_ibs(regs, msrs);
@@ -319,16 +364,11 @@ static void op_amd_start(struct op_msrs const * const msrs)
        int i;
 
        for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-               int offset = i + __get_cpu_var(switch_index);
-#else
-               int offset = i;
-#endif
-               if (reset_value[offset]) {
-                       rdmsrl(msrs->controls[i].addr, val);
-                       val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-                       wrmsrl(msrs->controls[i].addr, val);
-               }
+               if (!reset_value[op_x86_phys_to_virt(i)])
+                       continue;
+               rdmsrl(msrs->controls[i].addr, val);
+               val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+               wrmsrl(msrs->controls[i].addr, val);
        }
 
        op_amd_start_ibs();
@@ -344,11 +384,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
         * pm callback
         */
        for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-               if (!reset_value[i + per_cpu(switch_index, smp_processor_id())])
-#else
-               if (!reset_value[i])
-#endif
+               if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
                val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -366,14 +402,12 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
                if (msrs->counters[i].addr)
                        release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
        }
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < NUM_CONTROLS; ++i) {
                if (msrs->controls[i].addr)
                        release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
        }
 }
 
-#ifdef CONFIG_OPROFILE_IBS
-
 static u8 ibs_eilvt_off;
 
 static inline void apic_init_ibs_nmi_per_cpu(void *arg)
@@ -422,45 +456,36 @@ static int init_ibs_nmi(void)
                return 1;
        }
 
-#ifdef CONFIG_NUMA
-       /* Sanity check */
-       /* Works only for 64bit with proper numa implementation. */
-       if (nodes != num_possible_nodes()) {
-               printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
-                       "found: %d, expected %d",
-                       nodes, num_possible_nodes());
-               return 1;
-       }
-#endif
        return 0;
 }
 
 /* uninitialize the APIC for the IBS interrupts if needed */
 static void clear_ibs_nmi(void)
 {
-       if (has_ibs)
+       if (ibs_caps)
                on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
 }
 
 /* initialize the APIC for the IBS interrupts if available */
 static void ibs_init(void)
 {
-       has_ibs = boot_cpu_has(X86_FEATURE_IBS);
+       ibs_caps = get_ibs_caps();
 
-       if (!has_ibs)
+       if (!ibs_caps)
                return;
 
        if (init_ibs_nmi()) {
-               has_ibs = 0;
+               ibs_caps = 0;
                return;
        }
 
-       printk(KERN_INFO "oprofile: AMD IBS detected\n");
+       printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
+              (unsigned)ibs_caps);
 }
 
 static void ibs_exit(void)
 {
-       if (!has_ibs)
+       if (!ibs_caps)
                return;
 
        clear_ibs_nmi();
@@ -480,7 +505,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        if (ret)
                return ret;
 
-       if (!has_ibs)
+       if (!ibs_caps)
                return ret;
 
        /* model specific files */
@@ -490,7 +515,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        ibs_config.fetch_enabled = 0;
        ibs_config.max_cnt_op = 250000;
        ibs_config.op_enabled = 0;
-       ibs_config.dispatched_ops = 1;
+       ibs_config.dispatched_ops = 0;
 
        dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
        oprofilefs_create_ulong(sb, dir, "enable",
@@ -505,8 +530,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
                                &ibs_config.op_enabled);
        oprofilefs_create_ulong(sb, dir, "max_count",
                                &ibs_config.max_cnt_op);
-       oprofilefs_create_ulong(sb, dir, "dispatched_ops",
-                               &ibs_config.dispatched_ops);
+       if (ibs_caps & IBS_CAPS_OPCNT)
+               oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+                                       &ibs_config.dispatched_ops);
 
        return 0;
 }
@@ -524,24 +550,10 @@ static void op_amd_exit(void)
        ibs_exit();
 }
 
-#else
-
-/* no IBS support */
-
-static int op_amd_init(struct oprofile_operations *ops)
-{
-       return 0;
-}
-
-static void op_amd_exit(void) {}
-
-#endif /* CONFIG_OPROFILE_IBS */
-
-struct op_x86_model_spec const op_amd_spec = {
+struct op_x86_model_spec op_amd_spec = {
        .num_counters           = NUM_COUNTERS,
        .num_controls           = NUM_CONTROLS,
        .num_virt_counters      = NUM_VIRT_COUNTERS,
-       .num_virt_controls      = NUM_VIRT_CONTROLS,
        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
        .event_mask             = OP_EVENT_MASK,
        .init                   = op_amd_init,
@@ -553,6 +565,6 @@ struct op_x86_model_spec const op_amd_spec = {
        .stop                   = &op_amd_stop,
        .shutdown               = &op_amd_shutdown,
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-       .switch_ctrl            = &op_amd_switch_ctrl,
+       .switch_ctrl            = &op_mux_switch_ctrl,
 #endif
 };