sparc: Add Niagara2 HW cache event support.
[safe/jmp/linux-2.6] / arch / sparc / kernel / perf_event.c
1 /* Performance event support for sparc64.
2  *
3  * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
4  *
5  * This code is based almost entirely upon the x86 perf event
6  * code, which is:
7  *
8  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
9  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
10  *  Copyright (C) 2009 Jaswinder Singh Rajput
11  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
12  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
13  */
14
15 #include <linux/perf_event.h>
16 #include <linux/kprobes.h>
17 #include <linux/kernel.h>
18 #include <linux/kdebug.h>
19 #include <linux/mutex.h>
20
21 #include <asm/cpudata.h>
22 #include <asm/atomic.h>
23 #include <asm/nmi.h>
24 #include <asm/pcr.h>
25
26 /* Sparc64 chips have two performance counters, 32-bits each, with
27  * overflow interrupts generated on transition from 0xffffffff to 0.
28  * The counters are accessed in one go using a 64-bit register.
29  *
30  * Both counters are controlled using a single control register.  The
31  * only way to stop all sampling is to clear all of the context (user,
32  * supervisor, hypervisor) sampling enable bits.  But these bits apply
33  * to both counters, thus the two counters can't be enabled/disabled
34  * individually.
35  *
36  * The control register has two event fields, one for each of the two
37  * counters.  It's thus nearly impossible to have one counter going
38  * while keeping the other one stopped.  Therefore it is possible to
39  * get overflow interrupts for counters not currently "in use" and
40  * that condition must be checked in the overflow interrupt handler.
41  *
42  * So we use a hack, in that we program inactive counters with the
43  * "sw_count0" and "sw_count1" events.  These count how many times
44  * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
45  * unusual way to encode a NOP and therefore will not trigger in
46  * normal code.
47  */
48
49 #define MAX_HWEVENTS                    2
50 #define MAX_PERIOD                      ((1UL << 32) - 1)
51
52 #define PIC_UPPER_INDEX                 0
53 #define PIC_LOWER_INDEX                 1
54
55 struct cpu_hw_events {
56         struct perf_event       *events[MAX_HWEVENTS];
57         unsigned long           used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
58         unsigned long           active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
59         int enabled;
60 };
61 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
62
63 struct perf_event_map {
64         u16     encoding;
65         u8      pic_mask;
66 #define PIC_NONE        0x00
67 #define PIC_UPPER       0x01
68 #define PIC_LOWER       0x02
69 };
70
71 #define C(x) PERF_COUNT_HW_CACHE_##x
72
73 #define CACHE_OP_UNSUPPORTED    0xfffe
74 #define CACHE_OP_NONSENSE       0xffff
75
76 typedef struct perf_event_map cache_map_t
77                                 [PERF_COUNT_HW_CACHE_MAX]
78                                 [PERF_COUNT_HW_CACHE_OP_MAX]
79                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
80
81 struct sparc_pmu {
82         const struct perf_event_map     *(*event_map)(int);
83         const cache_map_t               *cache_map;
84         int                             max_events;
85         int                             upper_shift;
86         int                             lower_shift;
87         int                             event_mask;
88         int                             hv_bit;
89         int                             irq_bit;
90         int                             upper_nop;
91         int                             lower_nop;
92 };
93
94 static const struct perf_event_map ultra3_perfmon_event_map[] = {
95         [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
96         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
97         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
98         [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
99 };
100
101 static const struct perf_event_map *ultra3_event_map(int event_id)
102 {
103         return &ultra3_perfmon_event_map[event_id];
104 }
105
106 static const cache_map_t ultra3_cache_map = {
107 [C(L1D)] = {
108         [C(OP_READ)] = {
109                 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
110                 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
111         },
112         [C(OP_WRITE)] = {
113                 [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
114                 [C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
115         },
116         [C(OP_PREFETCH)] = {
117                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
118                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
119         },
120 },
121 [C(L1I)] = {
122         [C(OP_READ)] = {
123                 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
124                 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
125         },
126         [ C(OP_WRITE) ] = {
127                 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
128                 [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
129         },
130         [ C(OP_PREFETCH) ] = {
131                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
132                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
133         },
134 },
135 [C(LL)] = {
136         [C(OP_READ)] = {
137                 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, },
138                 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, },
139         },
140         [C(OP_WRITE)] = {
141                 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER },
142                 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER },
143         },
144         [C(OP_PREFETCH)] = {
145                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
146                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
147         },
148 },
149 [C(DTLB)] = {
150         [C(OP_READ)] = {
151                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
152                 [C(RESULT_MISS)] = { 0x12, PIC_UPPER, },
153         },
154         [ C(OP_WRITE) ] = {
155                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
156                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
157         },
158         [ C(OP_PREFETCH) ] = {
159                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
160                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
161         },
162 },
163 [C(ITLB)] = {
164         [C(OP_READ)] = {
165                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
166                 [C(RESULT_MISS)] = { 0x11, PIC_UPPER, },
167         },
168         [ C(OP_WRITE) ] = {
169                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
170                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
171         },
172         [ C(OP_PREFETCH) ] = {
173                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
174                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
175         },
176 },
177 [C(BPU)] = {
178         [C(OP_READ)] = {
179                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
180                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
181         },
182         [ C(OP_WRITE) ] = {
183                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
184                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
185         },
186         [ C(OP_PREFETCH) ] = {
187                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
188                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
189         },
190 },
191 };
192
193 static const struct sparc_pmu ultra3_pmu = {
194         .event_map      = ultra3_event_map,
195         .cache_map      = &ultra3_cache_map,
196         .max_events     = ARRAY_SIZE(ultra3_perfmon_event_map),
197         .upper_shift    = 11,
198         .lower_shift    = 4,
199         .event_mask     = 0x3f,
200         .upper_nop      = 0x1c,
201         .lower_nop      = 0x14,
202 };
203
204 static const struct perf_event_map niagara2_perfmon_event_map[] = {
205         [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
206         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
207         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
208         [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
209         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
210         [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
211 };
212
213 static const struct perf_event_map *niagara2_event_map(int event_id)
214 {
215         return &niagara2_perfmon_event_map[event_id];
216 }
217
218 static const cache_map_t niagara2_cache_map = {
219 [C(L1D)] = {
220         [C(OP_READ)] = {
221                 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
222                 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
223         },
224         [C(OP_WRITE)] = {
225                 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
226                 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
227         },
228         [C(OP_PREFETCH)] = {
229                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
230                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
231         },
232 },
233 [C(L1I)] = {
234         [C(OP_READ)] = {
235                 [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, },
236                 [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, },
237         },
238         [ C(OP_WRITE) ] = {
239                 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
240                 [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
241         },
242         [ C(OP_PREFETCH) ] = {
243                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
244                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
245         },
246 },
247 [C(LL)] = {
248         [C(OP_READ)] = {
249                 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
250                 [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, },
251         },
252         [C(OP_WRITE)] = {
253                 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
254                 [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, },
255         },
256         [C(OP_PREFETCH)] = {
257                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
258                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
259         },
260 },
261 [C(DTLB)] = {
262         [C(OP_READ)] = {
263                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
264                 [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, },
265         },
266         [ C(OP_WRITE) ] = {
267                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
268                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
269         },
270         [ C(OP_PREFETCH) ] = {
271                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
272                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
273         },
274 },
275 [C(ITLB)] = {
276         [C(OP_READ)] = {
277                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
278                 [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, },
279         },
280         [ C(OP_WRITE) ] = {
281                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
282                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
283         },
284         [ C(OP_PREFETCH) ] = {
285                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
286                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
287         },
288 },
289 [C(BPU)] = {
290         [C(OP_READ)] = {
291                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
292                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
293         },
294         [ C(OP_WRITE) ] = {
295                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
296                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
297         },
298         [ C(OP_PREFETCH) ] = {
299                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
300                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
301         },
302 },
303 };
304
305 static const struct sparc_pmu niagara2_pmu = {
306         .event_map      = niagara2_event_map,
307         .cache_map      = &niagara2_cache_map,
308         .max_events     = ARRAY_SIZE(niagara2_perfmon_event_map),
309         .upper_shift    = 19,
310         .lower_shift    = 6,
311         .event_mask     = 0xfff,
312         .hv_bit         = 0x8,
313         .irq_bit        = 0x03,
314         .upper_nop      = 0x220,
315         .lower_nop      = 0x220,
316 };
317
318 static const struct sparc_pmu *sparc_pmu __read_mostly;
319
320 static u64 event_encoding(u64 event_id, int idx)
321 {
322         if (idx == PIC_UPPER_INDEX)
323                 event_id <<= sparc_pmu->upper_shift;
324         else
325                 event_id <<= sparc_pmu->lower_shift;
326         return event_id;
327 }
328
329 static u64 mask_for_index(int idx)
330 {
331         return event_encoding(sparc_pmu->event_mask, idx);
332 }
333
334 static u64 nop_for_index(int idx)
335 {
336         return event_encoding(idx == PIC_UPPER_INDEX ?
337                               sparc_pmu->upper_nop :
338                               sparc_pmu->lower_nop, idx);
339 }
340
341 static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc,
342                                             int idx)
343 {
344         u64 val, mask = mask_for_index(idx);
345
346         val = pcr_ops->read();
347         pcr_ops->write((val & ~mask) | hwc->config);
348 }
349
350 static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc,
351                                              int idx)
352 {
353         u64 mask = mask_for_index(idx);
354         u64 nop = nop_for_index(idx);
355         u64 val = pcr_ops->read();
356
357         pcr_ops->write((val & ~mask) | nop);
358 }
359
360 void hw_perf_enable(void)
361 {
362         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
363         u64 val;
364         int i;
365
366         if (cpuc->enabled)
367                 return;
368
369         cpuc->enabled = 1;
370         barrier();
371
372         val = pcr_ops->read();
373
374         for (i = 0; i < MAX_HWEVENTS; i++) {
375                 struct perf_event *cp = cpuc->events[i];
376                 struct hw_perf_event *hwc;
377
378                 if (!cp)
379                         continue;
380                 hwc = &cp->hw;
381                 val |= hwc->config_base;
382         }
383
384         pcr_ops->write(val);
385 }
386
387 void hw_perf_disable(void)
388 {
389         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
390         u64 val;
391
392         if (!cpuc->enabled)
393                 return;
394
395         cpuc->enabled = 0;
396
397         val = pcr_ops->read();
398         val &= ~(PCR_UTRACE | PCR_STRACE |
399                  sparc_pmu->hv_bit | sparc_pmu->irq_bit);
400         pcr_ops->write(val);
401 }
402
403 static u32 read_pmc(int idx)
404 {
405         u64 val;
406
407         read_pic(val);
408         if (idx == PIC_UPPER_INDEX)
409                 val >>= 32;
410
411         return val & 0xffffffff;
412 }
413
414 static void write_pmc(int idx, u64 val)
415 {
416         u64 shift, mask, pic;
417
418         shift = 0;
419         if (idx == PIC_UPPER_INDEX)
420                 shift = 32;
421
422         mask = ((u64) 0xffffffff) << shift;
423         val <<= shift;
424
425         read_pic(pic);
426         pic &= ~mask;
427         pic |= val;
428         write_pic(pic);
429 }
430
431 static int sparc_perf_event_set_period(struct perf_event *event,
432                                          struct hw_perf_event *hwc, int idx)
433 {
434         s64 left = atomic64_read(&hwc->period_left);
435         s64 period = hwc->sample_period;
436         int ret = 0;
437
438         if (unlikely(left <= -period)) {
439                 left = period;
440                 atomic64_set(&hwc->period_left, left);
441                 hwc->last_period = period;
442                 ret = 1;
443         }
444
445         if (unlikely(left <= 0)) {
446                 left += period;
447                 atomic64_set(&hwc->period_left, left);
448                 hwc->last_period = period;
449                 ret = 1;
450         }
451         if (left > MAX_PERIOD)
452                 left = MAX_PERIOD;
453
454         atomic64_set(&hwc->prev_count, (u64)-left);
455
456         write_pmc(idx, (u64)(-left) & 0xffffffff);
457
458         perf_event_update_userpage(event);
459
460         return ret;
461 }
462
463 static int sparc_pmu_enable(struct perf_event *event)
464 {
465         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
466         struct hw_perf_event *hwc = &event->hw;
467         int idx = hwc->idx;
468
469         if (test_and_set_bit(idx, cpuc->used_mask))
470                 return -EAGAIN;
471
472         sparc_pmu_disable_event(hwc, idx);
473
474         cpuc->events[idx] = event;
475         set_bit(idx, cpuc->active_mask);
476
477         sparc_perf_event_set_period(event, hwc, idx);
478         sparc_pmu_enable_event(hwc, idx);
479         perf_event_update_userpage(event);
480         return 0;
481 }
482
483 static u64 sparc_perf_event_update(struct perf_event *event,
484                                      struct hw_perf_event *hwc, int idx)
485 {
486         int shift = 64 - 32;
487         u64 prev_raw_count, new_raw_count;
488         s64 delta;
489
490 again:
491         prev_raw_count = atomic64_read(&hwc->prev_count);
492         new_raw_count = read_pmc(idx);
493
494         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
495                              new_raw_count) != prev_raw_count)
496                 goto again;
497
498         delta = (new_raw_count << shift) - (prev_raw_count << shift);
499         delta >>= shift;
500
501         atomic64_add(delta, &event->count);
502         atomic64_sub(delta, &hwc->period_left);
503
504         return new_raw_count;
505 }
506
507 static void sparc_pmu_disable(struct perf_event *event)
508 {
509         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
510         struct hw_perf_event *hwc = &event->hw;
511         int idx = hwc->idx;
512
513         clear_bit(idx, cpuc->active_mask);
514         sparc_pmu_disable_event(hwc, idx);
515
516         barrier();
517
518         sparc_perf_event_update(event, hwc, idx);
519         cpuc->events[idx] = NULL;
520         clear_bit(idx, cpuc->used_mask);
521
522         perf_event_update_userpage(event);
523 }
524
525 static void sparc_pmu_read(struct perf_event *event)
526 {
527         struct hw_perf_event *hwc = &event->hw;
528         sparc_perf_event_update(event, hwc, hwc->idx);
529 }
530
531 static void sparc_pmu_unthrottle(struct perf_event *event)
532 {
533         struct hw_perf_event *hwc = &event->hw;
534         sparc_pmu_enable_event(hwc, hwc->idx);
535 }
536
537 static atomic_t active_events = ATOMIC_INIT(0);
538 static DEFINE_MUTEX(pmc_grab_mutex);
539
540 void perf_event_grab_pmc(void)
541 {
542         if (atomic_inc_not_zero(&active_events))
543                 return;
544
545         mutex_lock(&pmc_grab_mutex);
546         if (atomic_read(&active_events) == 0) {
547                 if (atomic_read(&nmi_active) > 0) {
548                         on_each_cpu(stop_nmi_watchdog, NULL, 1);
549                         BUG_ON(atomic_read(&nmi_active) != 0);
550                 }
551                 atomic_inc(&active_events);
552         }
553         mutex_unlock(&pmc_grab_mutex);
554 }
555
556 void perf_event_release_pmc(void)
557 {
558         if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
559                 if (atomic_read(&nmi_active) == 0)
560                         on_each_cpu(start_nmi_watchdog, NULL, 1);
561                 mutex_unlock(&pmc_grab_mutex);
562         }
563 }
564
565 static const struct perf_event_map *sparc_map_cache_event(u64 config)
566 {
567         unsigned int cache_type, cache_op, cache_result;
568         const struct perf_event_map *pmap;
569
570         if (!sparc_pmu->cache_map)
571                 return ERR_PTR(-ENOENT);
572
573         cache_type = (config >>  0) & 0xff;
574         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
575                 return ERR_PTR(-EINVAL);
576
577         cache_op = (config >>  8) & 0xff;
578         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
579                 return ERR_PTR(-EINVAL);
580
581         cache_result = (config >> 16) & 0xff;
582         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
583                 return ERR_PTR(-EINVAL);
584
585         pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]);
586
587         if (pmap->encoding == CACHE_OP_UNSUPPORTED)
588                 return ERR_PTR(-ENOENT);
589
590         if (pmap->encoding == CACHE_OP_NONSENSE)
591                 return ERR_PTR(-EINVAL);
592
593         return pmap;
594 }
595
596 static void hw_perf_event_destroy(struct perf_event *event)
597 {
598         perf_event_release_pmc();
599 }
600
601 static int __hw_perf_event_init(struct perf_event *event)
602 {
603         struct perf_event_attr *attr = &event->attr;
604         struct hw_perf_event *hwc = &event->hw;
605         const struct perf_event_map *pmap;
606         u64 enc;
607
608         if (atomic_read(&nmi_active) < 0)
609                 return -ENODEV;
610
611         if (attr->type == PERF_TYPE_HARDWARE) {
612                 if (attr->config >= sparc_pmu->max_events)
613                         return -EINVAL;
614                 pmap = sparc_pmu->event_map(attr->config);
615         } else if (attr->type == PERF_TYPE_HW_CACHE) {
616                 pmap = sparc_map_cache_event(attr->config);
617                 if (IS_ERR(pmap))
618                         return PTR_ERR(pmap);
619         } else
620                 return -EOPNOTSUPP;
621
622         perf_event_grab_pmc();
623         event->destroy = hw_perf_event_destroy;
624
625         /* We save the enable bits in the config_base.  So to
626          * turn off sampling just write 'config', and to enable
627          * things write 'config | config_base'.
628          */
629         hwc->config_base = sparc_pmu->irq_bit;
630         if (!attr->exclude_user)
631                 hwc->config_base |= PCR_UTRACE;
632         if (!attr->exclude_kernel)
633                 hwc->config_base |= PCR_STRACE;
634         if (!attr->exclude_hv)
635                 hwc->config_base |= sparc_pmu->hv_bit;
636
637         if (!hwc->sample_period) {
638                 hwc->sample_period = MAX_PERIOD;
639                 hwc->last_period = hwc->sample_period;
640                 atomic64_set(&hwc->period_left, hwc->sample_period);
641         }
642
643         enc = pmap->encoding;
644         if (pmap->pic_mask & PIC_UPPER) {
645                 hwc->idx = PIC_UPPER_INDEX;
646                 enc <<= sparc_pmu->upper_shift;
647         } else {
648                 hwc->idx = PIC_LOWER_INDEX;
649                 enc <<= sparc_pmu->lower_shift;
650         }
651
652         hwc->config |= enc;
653         return 0;
654 }
655
656 static const struct pmu pmu = {
657         .enable         = sparc_pmu_enable,
658         .disable        = sparc_pmu_disable,
659         .read           = sparc_pmu_read,
660         .unthrottle     = sparc_pmu_unthrottle,
661 };
662
663 const struct pmu *hw_perf_event_init(struct perf_event *event)
664 {
665         int err = __hw_perf_event_init(event);
666
667         if (err)
668                 return ERR_PTR(err);
669         return &pmu;
670 }
671
672 void perf_event_print_debug(void)
673 {
674         unsigned long flags;
675         u64 pcr, pic;
676         int cpu;
677
678         if (!sparc_pmu)
679                 return;
680
681         local_irq_save(flags);
682
683         cpu = smp_processor_id();
684
685         pcr = pcr_ops->read();
686         read_pic(pic);
687
688         pr_info("\n");
689         pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
690                 cpu, pcr, pic);
691
692         local_irq_restore(flags);
693 }
694
695 static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
696                                               unsigned long cmd, void *__args)
697 {
698         struct die_args *args = __args;
699         struct perf_sample_data data;
700         struct cpu_hw_events *cpuc;
701         struct pt_regs *regs;
702         int idx;
703
704         if (!atomic_read(&active_events))
705                 return NOTIFY_DONE;
706
707         switch (cmd) {
708         case DIE_NMI:
709                 break;
710
711         default:
712                 return NOTIFY_DONE;
713         }
714
715         regs = args->regs;
716
717         data.addr = 0;
718
719         cpuc = &__get_cpu_var(cpu_hw_events);
720         for (idx = 0; idx < MAX_HWEVENTS; idx++) {
721                 struct perf_event *event = cpuc->events[idx];
722                 struct hw_perf_event *hwc;
723                 u64 val;
724
725                 if (!test_bit(idx, cpuc->active_mask))
726                         continue;
727                 hwc = &event->hw;
728                 val = sparc_perf_event_update(event, hwc, idx);
729                 if (val & (1ULL << 31))
730                         continue;
731
732                 data.period = event->hw.last_period;
733                 if (!sparc_perf_event_set_period(event, hwc, idx))
734                         continue;
735
736                 if (perf_event_overflow(event, 1, &data, regs))
737                         sparc_pmu_disable_event(hwc, idx);
738         }
739
740         return NOTIFY_STOP;
741 }
742
743 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
744         .notifier_call          = perf_event_nmi_handler,
745 };
746
747 static bool __init supported_pmu(void)
748 {
749         if (!strcmp(sparc_pmu_type, "ultra3") ||
750             !strcmp(sparc_pmu_type, "ultra3+") ||
751             !strcmp(sparc_pmu_type, "ultra3i") ||
752             !strcmp(sparc_pmu_type, "ultra4+")) {
753                 sparc_pmu = &ultra3_pmu;
754                 return true;
755         }
756         if (!strcmp(sparc_pmu_type, "niagara2")) {
757                 sparc_pmu = &niagara2_pmu;
758                 return true;
759         }
760         return false;
761 }
762
763 void __init init_hw_perf_events(void)
764 {
765         pr_info("Performance events: ");
766
767         if (!supported_pmu()) {
768                 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
769                 return;
770         }
771
772         pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
773
774         /* All sparc64 PMUs currently have 2 events.  But this simple
775          * driver only supports one active event at a time.
776          */
777         perf_max_events = 1;
778
779         register_die_notifier(&perf_event_nmi_notifier);
780 }