* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
+#include <linux/string.h>
#include <asm/reg.h>
+#include <asm/cputable.h>
/*
* Bits in event code for POWER6
#define MMCR1_NESTSEL_SH 45
#define MMCR1_NESTSEL_MSK 0x7
#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
-#define MMCR1_PMC1_LLA ((u64)1 << 44)
-#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
-#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
+#define MMCR1_PMC1_LLA (1ul << 44)
+#define MMCR1_PMC1_LLA_VALUE (1ul << 39)
+#define MMCR1_PMC1_ADDR_SEL (1ul << 35)
#define MMCR1_PMC1SEL_SH 24
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
-static int power6_marked_instr_event(unsigned int event)
+static int power6_marked_instr_event(u64 event)
{
int pmc, psel, ptype;
int bit, byte, unit;
/*
* Assign PMC numbers and compute MMCR1 value for a set of events
*/
-static int p6_compute_mmcr(unsigned int event[], int n_ev,
- unsigned int hwc[], u64 mmcr[])
+static int p6_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], unsigned long mmcr[])
{
- u64 mmcr1 = 0;
- u64 mmcra = 0;
+ unsigned long mmcr1 = 0;
+ unsigned long mmcra = 0;
int i;
unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0;
unsigned int pmc_inuse = 0;
- if (n_ev > 4)
+ if (n_ev > 6)
return -1;
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
for (pmc = 0; pmc < 4; ++pmc)
if (!(pmc_inuse & (1 << pmc)))
break;
+ if (pmc >= 4)
+ return -1;
pmc_inuse |= 1 << pmc;
}
hwc[i] = pmc;
/* check for conflict on this byte of event bus */
if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
return -1;
- mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
+ mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
ttmset |= 1 << b;
if (u == 5) {
/* Nest events have a further mux */
MMCR1_NESTSEL(mmcr1) != s)
return -1;
ttmset |= 0x10;
- mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
+ mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
}
if (0x30 <= psel && psel <= 0x3d) {
/* these need the PMCx_ADDR_SEL bits */
}
if (power6_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
- mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+ if (pmc < 4)
+ mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
}
mmcr[0] = 0;
if (pmc_inuse & 1)
* Layout of constraint bits:
*
* 0-1 add field: number of uses of PMC1 (max 1)
- * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4
- * 8-10 select field: nest (subunit) event selector
+ * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
+ * 12-15 add field: number of uses of PMC1-4 (max 4)
* 16-19 select field: unit on byte 0 of event bus
* 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ * 32-34 select field: nest (subunit) event selector
*/
-static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int p6_get_constraint(u64 event, unsigned long *maskp,
+ unsigned long *valp)
{
- int pmc, byte, sh;
- unsigned int mask = 0, value = 0;
+ int pmc, byte, sh, subunit;
+ unsigned long mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
- if (pmc > 4)
+ if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
}
if (event & PM_BUSEVENT_MSK) {
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- sh = byte * 4;
+ sh = byte * 4 + (16 - PM_UNIT_SH);
mask |= PM_UNIT_MSKS << sh;
- value |= (event & PM_UNIT_MSKS) << sh;
+ value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
- mask |= PM_SUBUNIT_MSKS;
- value |= event & PM_SUBUNIT_MSKS;
+ subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+ mask |= (unsigned long)PM_SUBUNIT_MSK << 32;
+ value |= (unsigned long)subunit << 32;
}
}
+ if (pmc <= 4) {
+ mask |= 0x8000; /* add field for count of PMC1-4 uses */
+ value |= 0x1000;
+ }
*maskp = mask;
*valp = value;
return 0;
}
+static int p6_limited_pmc_event(u64 event)
+{
+ int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+ return pmc == 5 || pmc == 6;
+}
+
#define MAX_ALT 4 /* at most 4 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
{ 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
- { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */
+ { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
{ 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
{ 0x10000e, 0x400010 }, /* PM_PURR */
{ 0x100010, 0x4000f8 }, /* PM_FLUSH */
* This could be made more efficient with a binary search on
* a presorted list, if necessary
*/
-static int find_alternatives_list(unsigned int event)
+static int find_alternatives_list(u64 event)
{
int i, j;
unsigned int alt;
return -1;
}
-static int p6_get_alternatives(unsigned int event, unsigned int alt[])
+static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
- int i, j;
- unsigned int aevent, psel, pmc;
+ int i, j, nlim;
+ unsigned int psel, pmc;
unsigned int nalt = 1;
+ u64 aevent;
alt[0] = event;
+ nlim = p6_limited_pmc_event(event);
/* check the alternatives table */
i = find_alternatives_list(event);
break;
if (aevent != event)
alt[nalt++] = aevent;
+ nlim += p6_limited_pmc_event(aevent);
}
} else {
((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
}
+ if (flags & PPMU_ONLY_COUNT_RUN) {
+ /*
+ * We're only counting in RUN state,
+ * so PM_CYC is equivalent to PM_RUN_CYC,
+ * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
+ * This doesn't include alternatives that don't provide
+ * any extra flexibility in assigning PMCs (e.g.
+ * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
+ * Note that even with these additional alternatives
+ * we never end up with more than 4 alternatives for any event.
+ */
+ j = nalt;
+ for (i = 0; i < nalt; ++i) {
+ switch (alt[i]) {
+ case 0x1e: /* PM_CYC */
+ alt[j++] = 0x600005; /* PM_RUN_CYC */
+ ++nlim;
+ break;
+ case 0x10000a: /* PM_RUN_CYC */
+ alt[j++] = 0x1e; /* PM_CYC */
+ break;
+ case 2: /* PM_INST_CMPL */
+ alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
+ ++nlim;
+ break;
+ case 0x500009: /* PM_RUN_INST_CMPL */
+ alt[j++] = 2; /* PM_INST_CMPL */
+ break;
+ case 0x10000e: /* PM_PURR */
+ alt[j++] = 0x4000f4; /* PM_RUN_PURR */
+ break;
+ case 0x4000f4: /* PM_RUN_PURR */
+ alt[j++] = 0x10000e; /* PM_PURR */
+ break;
+ }
+ }
+ nalt = j;
+ }
+
+ if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+ /* remove the limited PMC events */
+ j = 0;
+ for (i = 0; i < nalt; ++i) {
+ if (!p6_limited_pmc_event(alt[i])) {
+ alt[j] = alt[i];
+ ++j;
+ }
+ }
+ nalt = j;
+ } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+ /* remove all but the limited PMC events */
+ j = 0;
+ for (i = 0; i < nalt; ++i) {
+ if (p6_limited_pmc_event(alt[i])) {
+ alt[j] = alt[i];
+ ++j;
+ }
+ }
+ nalt = j;
+ }
+
return nalt;
}
-static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
+static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{
/* Set PMCxSEL to 0 to disable PMCx */
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+ if (pmc <= 3)
+ mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power6_generic_events[] = {
- [PERF_COUNT_CPU_CYCLES] = 0x1e,
- [PERF_COUNT_INSTRUCTIONS] = 2,
- [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
- [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
- [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
- [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 2,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
+ */
+static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x80082, 0x80080 },
+ [C(OP_WRITE)] = { 0x80086, 0x80088 },
+ [C(OP_PREFETCH)] = { 0x810a4, 0 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x100056 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0x4008c, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x150730, 0x250532 },
+ [C(OP_WRITE)] = { 0x250432, 0x150432 },
+ [C(OP_PREFETCH)] = { 0x810a6, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x20000e },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x420ce },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x430e6, 0x400052 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
};
-struct power_pmu power6_pmu = {
- .n_counter = 4,
- .max_alternatives = MAX_ALT,
- .add_fields = 0x55,
- .test_adder = 0,
- .compute_mmcr = p6_compute_mmcr,
- .get_constraint = p6_get_constraint,
- .get_alternatives = p6_get_alternatives,
- .disable_pmc = p6_disable_pmc,
- .n_generic = ARRAY_SIZE(power6_generic_events),
- .generic_events = power6_generic_events,
+static struct power_pmu power6_pmu = {
+ .name = "POWER6",
+ .n_counter = 6,
+ .max_alternatives = MAX_ALT,
+ .add_fields = 0x1555,
+ .test_adder = 0x3000,
+ .compute_mmcr = p6_compute_mmcr,
+ .get_constraint = p6_get_constraint,
+ .get_alternatives = p6_get_alternatives,
+ .disable_pmc = p6_disable_pmc,
+ .limited_pmc_event = p6_limited_pmc_event,
+ .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
+ .n_generic = ARRAY_SIZE(power6_generic_events),
+ .generic_events = power6_generic_events,
+ .cache_events = &power6_cache_events,
};
+
+static int init_power6_pmu(void)
+{
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
+ return -ENODEV;
+
+ return register_power_pmu(&power6_pmu);
+}
+
+arch_initcall(init_power6_pmu);