sh: Populate initial secondary CPU info from boot_cpu_data.
[safe/jmp/linux-2.6] / arch / powerpc / kernel / power6-pmu.c
index fce1fc2..ca399ba 100644 (file)
@@ -9,8 +9,10 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
+#include <linux/string.h>
 #include <asm/reg.h>
+#include <asm/cputable.h>
 
 /*
  * Bits in event code for POWER6
@@ -41,9 +43,9 @@
 #define MMCR1_NESTSEL_SH       45
 #define MMCR1_NESTSEL_MSK      0x7
 #define MMCR1_NESTSEL(m)       (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
-#define MMCR1_PMC1_LLA         ((u64)1 << 44)
-#define MMCR1_PMC1_LLA_VALUE   ((u64)1 << 39)
-#define MMCR1_PMC1_ADDR_SEL    ((u64)1 << 35)
+#define MMCR1_PMC1_LLA         (1ul << 44)
+#define MMCR1_PMC1_LLA_VALUE   (1ul << 39)
+#define MMCR1_PMC1_ADDR_SEL    (1ul << 35)
 #define MMCR1_PMC1SEL_SH       24
 #define MMCR1_PMCSEL_SH(n)     (MMCR1_PMC1SEL_SH - (n) * 8)
 #define MMCR1_PMCSEL_MSK       0xff
@@ -134,7 +136,7 @@ static u32 marked_bus_events[16] = {
  * Returns 1 if event counts things relating to marked instructions
  * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  */
-static int power6_marked_instr_event(unsigned int event)
+static int power6_marked_instr_event(u64 event)
 {
        int pmc, psel, ptype;
        int bit, byte, unit;
@@ -172,17 +174,17 @@ static int power6_marked_instr_event(unsigned int event)
 /*
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
-static int p6_compute_mmcr(unsigned int event[], int n_ev,
-                          unsigned int hwc[], u64 mmcr[])
+static int p6_compute_mmcr(u64 event[], int n_ev,
+                          unsigned int hwc[], unsigned long mmcr[])
 {
-       u64 mmcr1 = 0;
-       u64 mmcra = 0;
+       unsigned long mmcr1 = 0;
+       unsigned long mmcra = 0;
        int i;
        unsigned int pmc, ev, b, u, s, psel;
        unsigned int ttmset = 0;
        unsigned int pmc_inuse = 0;
 
-       if (n_ev > 4)
+       if (n_ev > 6)
                return -1;
        for (i = 0; i < n_ev; ++i) {
                pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
@@ -202,6 +204,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
                        for (pmc = 0; pmc < 4; ++pmc)
                                if (!(pmc_inuse & (1 << pmc)))
                                        break;
+                       if (pmc >= 4)
+                               return -1;
                        pmc_inuse |= 1 << pmc;
                }
                hwc[i] = pmc;
@@ -213,7 +217,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
                        /* check for conflict on this byte of event bus */
                        if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
                                return -1;
-                       mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
+                       mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
                        ttmset |= 1 << b;
                        if (u == 5) {
                                /* Nest events have a further mux */
@@ -222,7 +226,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
                                    MMCR1_NESTSEL(mmcr1) != s)
                                        return -1;
                                ttmset |= 0x10;
-                               mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
+                               mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
                        }
                        if (0x30 <= psel && psel <= 0x3d) {
                                /* these need the PMCx_ADDR_SEL bits */
@@ -240,7 +244,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
                }
                if (power6_marked_instr_event(event[i]))
                        mmcra |= MMCRA_SAMPLE_ENABLE;
-               mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+               if (pmc < 4)
+                       mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
        }
        mmcr[0] = 0;
        if (pmc_inuse & 1)
@@ -256,19 +261,21 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
  * Layout of constraint bits:
  *
  *     0-1     add field: number of uses of PMC1 (max 1)
- *     2-3, 4-5, 6-7: ditto for PMC2, 3, 4
- *     8-10    select field: nest (subunit) event selector
+ *     2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
+ *     12-15   add field: number of uses of PMC1-4 (max 4)
  *     16-19   select field: unit on byte 0 of event bus
  *     20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ *     32-34   select field: nest (subunit) event selector
  */
-static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int p6_get_constraint(u64 event, unsigned long *maskp,
+                            unsigned long *valp)
 {
-       int pmc, byte, sh;
-       unsigned int mask = 0, value = 0;
+       int pmc, byte, sh, subunit;
+       unsigned long mask = 0, value = 0;
 
        pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
        if (pmc) {
-               if (pmc > 4)
+               if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
                        return -1;
                sh = (pmc - 1) * 2;
                mask |= 2 << sh;
@@ -276,26 +283,38 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
        }
        if (event & PM_BUSEVENT_MSK) {
                byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
-               sh = byte * 4;
+               sh = byte * 4 + (16 - PM_UNIT_SH);
                mask |= PM_UNIT_MSKS << sh;
-               value |= (event & PM_UNIT_MSKS) << sh;
+               value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
                if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
-                       mask |= PM_SUBUNIT_MSKS;
-                       value |= event & PM_SUBUNIT_MSKS;
+                       subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+                       mask  |= (unsigned long)PM_SUBUNIT_MSK << 32;
+                       value |= (unsigned long)subunit << 32;
                }
        }
+       if (pmc <= 4) {
+               mask  |= 0x8000;        /* add field for count of PMC1-4 uses */
+               value |= 0x1000;
+       }
        *maskp = mask;
        *valp = value;
        return 0;
 }
 
+static int p6_limited_pmc_event(u64 event)
+{
+       int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+       return pmc == 5 || pmc == 6;
+}
+
 #define MAX_ALT        4       /* at most 4 alternatives for any event */
 
 static const unsigned int event_alternatives[][MAX_ALT] = {
        { 0x0130e8, 0x2000f6, 0x3000fc },       /* PM_PTEG_RELOAD_VALID */
        { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
        { 0x080088, 0x200054, 0x3000f0 },       /* PM_ST_MISS_L1 */
-       { 0x10000a, 0x2000f4 },                 /* PM_RUN_CYC */
+       { 0x10000a, 0x2000f4, 0x600005 },       /* PM_RUN_CYC */
        { 0x10000b, 0x2000f5 },                 /* PM_RUN_COUNT */
        { 0x10000e, 0x400010 },                 /* PM_PURR */
        { 0x100010, 0x4000f8 },                 /* PM_FLUSH */
@@ -321,7 +340,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
  * This could be made more efficient with a binary search on
  * a presorted list, if necessary
  */
-static int find_alternatives_list(unsigned int event)
+static int find_alternatives_list(u64 event)
 {
        int i, j;
        unsigned int alt;
@@ -340,13 +359,15 @@ static int find_alternatives_list(unsigned int event)
        return -1;
 }
 
-static int p6_get_alternatives(unsigned int event, unsigned int alt[])
+static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
-       int i, j;
-       unsigned int aevent, psel, pmc;
+       int i, j, nlim;
+       unsigned int psel, pmc;
        unsigned int nalt = 1;
+       u64 aevent;
 
        alt[0] = event;
+       nlim = p6_limited_pmc_event(event);
 
        /* check the alternatives table */
        i = find_alternatives_list(event);
@@ -358,6 +379,7 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
                                break;
                        if (aevent != event)
                                alt[nalt++] = aevent;
+                       nlim += p6_limited_pmc_event(aevent);
                }
 
        } else {
@@ -375,33 +397,151 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
                                ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
        }
 
+       if (flags & PPMU_ONLY_COUNT_RUN) {
+               /*
+                * We're only counting in RUN state,
+                * so PM_CYC is equivalent to PM_RUN_CYC,
+                * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
+                * This doesn't include alternatives that don't provide
+                * any extra flexibility in assigning PMCs (e.g.
+                * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
+                * Note that even with these additional alternatives
+                * we never end up with more than 4 alternatives for any event.
+                */
+               j = nalt;
+               for (i = 0; i < nalt; ++i) {
+                       switch (alt[i]) {
+                       case 0x1e:      /* PM_CYC */
+                               alt[j++] = 0x600005;    /* PM_RUN_CYC */
+                               ++nlim;
+                               break;
+                       case 0x10000a:  /* PM_RUN_CYC */
+                               alt[j++] = 0x1e;        /* PM_CYC */
+                               break;
+                       case 2:         /* PM_INST_CMPL */
+                               alt[j++] = 0x500009;    /* PM_RUN_INST_CMPL */
+                               ++nlim;
+                               break;
+                       case 0x500009:  /* PM_RUN_INST_CMPL */
+                               alt[j++] = 2;           /* PM_INST_CMPL */
+                               break;
+                       case 0x10000e:  /* PM_PURR */
+                               alt[j++] = 0x4000f4;    /* PM_RUN_PURR */
+                               break;
+                       case 0x4000f4:  /* PM_RUN_PURR */
+                               alt[j++] = 0x10000e;    /* PM_PURR */
+                               break;
+                       }
+               }
+               nalt = j;
+       }
+
+       if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+               /* remove the limited PMC events */
+               j = 0;
+               for (i = 0; i < nalt; ++i) {
+                       if (!p6_limited_pmc_event(alt[i])) {
+                               alt[j] = alt[i];
+                               ++j;
+                       }
+               }
+               nalt = j;
+       } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+               /* remove all but the limited PMC events */
+               j = 0;
+               for (i = 0; i < nalt; ++i) {
+                       if (p6_limited_pmc_event(alt[i])) {
+                               alt[j] = alt[i];
+                               ++j;
+                       }
+               }
+               nalt = j;
+       }
+
        return nalt;
 }
 
-static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
+static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
 {
        /* Set PMCxSEL to 0 to disable PMCx */
-       mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+       if (pmc <= 3)
+               mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
 }
 
 static int power6_generic_events[] = {
-       [PERF_COUNT_CPU_CYCLES] = 0x1e,
-       [PERF_COUNT_INSTRUCTIONS] = 2,
-       [PERF_COUNT_CACHE_REFERENCES] = 0x280030,       /* LD_REF_L1 */
-       [PERF_COUNT_CACHE_MISSES] = 0x30000c,           /* LD_MISS_L1 */
-       [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0,     /* BR_PRED */ 
-       [PERF_COUNT_BRANCH_MISSES] = 0x400052,          /* BR_MPRED */
+       [PERF_COUNT_HW_CPU_CYCLES]              = 0x1e,
+       [PERF_COUNT_HW_INSTRUCTIONS]            = 2,
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x280030, /* LD_REF_L1 */
+       [PERF_COUNT_HW_CACHE_MISSES]            = 0x30000c, /* LD_MISS_L1 */
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x410a0,  /* BR_PRED */
+       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x400052, /* BR_MPRED */
+};
+
+#define C(x)   PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
+ */
+static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+       [C(L1D)] = {            /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        0x80082,        0x80080         },
+               [C(OP_WRITE)] = {       0x80086,        0x80088         },
+               [C(OP_PREFETCH)] = {    0x810a4,        0               },
+       },
+       [C(L1I)] = {            /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        0,              0x100056        },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    0x4008c,        0               },
+       },
+       [C(LL)] = {             /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        0x150730,       0x250532        },
+               [C(OP_WRITE)] = {       0x250432,       0x150432        },
+               [C(OP_PREFETCH)] = {    0x810a6,        0               },
+       },
+       [C(DTLB)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        0,              0x20000e        },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    -1,             -1              },
+       },
+       [C(ITLB)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        0,              0x420ce         },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    -1,             -1              },
+       },
+       [C(BPU)] = {            /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        0x430e6,        0x400052        },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    -1,             -1              },
+       },
 };
 
-struct power_pmu power6_pmu = {
-       .n_counter = 4,
-       .max_alternatives = MAX_ALT,
-       .add_fields = 0x55,
-       .test_adder = 0,
-       .compute_mmcr = p6_compute_mmcr,
-       .get_constraint = p6_get_constraint,
-       .get_alternatives = p6_get_alternatives,
-       .disable_pmc = p6_disable_pmc,
-       .n_generic = ARRAY_SIZE(power6_generic_events),
-       .generic_events = power6_generic_events,
+static struct power_pmu power6_pmu = {
+       .name                   = "POWER6",
+       .n_counter              = 6,
+       .max_alternatives       = MAX_ALT,
+       .add_fields             = 0x1555,
+       .test_adder             = 0x3000,
+       .compute_mmcr           = p6_compute_mmcr,
+       .get_constraint         = p6_get_constraint,
+       .get_alternatives       = p6_get_alternatives,
+       .disable_pmc            = p6_disable_pmc,
+       .limited_pmc_event      = p6_limited_pmc_event,
+       .flags                  = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
+       .n_generic              = ARRAY_SIZE(power6_generic_events),
+       .generic_events         = power6_generic_events,
+       .cache_events           = &power6_cache_events,
 };
+
+static int init_power6_pmu(void)
+{
+       if (!cur_cpu_spec->oprofile_cpu_type ||
+           strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
+               return -ENODEV;
+
+       return register_power_pmu(&power6_pmu);
+}
+
+arch_initcall(init_power6_pmu);