x86/oprofile: Implement multiplexing setup/shutdown functions
[safe/jmp/linux-2.6] / arch / x86 / oprofile / nmi_int.c
1 /**
2  * @file nmi_int.c
3  *
4  * @remark Copyright 2002-2009 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  * @author Robert Richter <robert.richter@amd.com>
9  * @author Barry Kasindorf <barry.kasindorf@amd.com>
10  * @author Jason Yeh <jason.yeh@amd.com>
11  * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
12  */
13
14 #include <linux/init.h>
15 #include <linux/notifier.h>
16 #include <linux/smp.h>
17 #include <linux/oprofile.h>
18 #include <linux/sysdev.h>
19 #include <linux/slab.h>
20 #include <linux/moduleparam.h>
21 #include <linux/kdebug.h>
22 #include <linux/cpu.h>
23 #include <asm/nmi.h>
24 #include <asm/msr.h>
25 #include <asm/apic.h>
26
27 #include "op_counter.h"
28 #include "op_x86_model.h"
29
30 static struct op_x86_model_spec const *model;
31 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
32 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
33
34 /* 0 == registered but off, 1 == registered and on */
35 static int nmi_enabled = 0;
36
37
38 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
39 extern atomic_t multiplex_counter;
40 #endif
41
42 struct op_counter_config counter_config[OP_MAX_COUNTER];
43
44 /* common functions */
45
46 u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
47                     struct op_counter_config *counter_config)
48 {
49         u64 val = 0;
50         u16 event = (u16)counter_config->event;
51
52         val |= ARCH_PERFMON_EVENTSEL_INT;
53         val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
54         val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
55         val |= (counter_config->unit_mask & 0xFF) << 8;
56         event &= model->event_mask ? model->event_mask : 0xFF;
57         val |= event & 0xFF;
58         val |= (event & 0x0F00) << 24;
59
60         return val;
61 }
62
63
64 static int profile_exceptions_notify(struct notifier_block *self,
65                                      unsigned long val, void *data)
66 {
67         struct die_args *args = (struct die_args *)data;
68         int ret = NOTIFY_DONE;
69         int cpu = smp_processor_id();
70
71         switch (val) {
72         case DIE_NMI:
73         case DIE_NMI_IPI:
74                 model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
75                 ret = NOTIFY_STOP;
76                 break;
77         default:
78                 break;
79         }
80         return ret;
81 }
82
83 static void nmi_cpu_save_registers(struct op_msrs *msrs)
84 {
85         struct op_msr *counters = msrs->counters;
86         struct op_msr *controls = msrs->controls;
87         unsigned int i;
88
89         for (i = 0; i < model->num_counters; ++i) {
90                 if (counters[i].addr)
91                         rdmsrl(counters[i].addr, counters[i].saved);
92         }
93
94         for (i = 0; i < model->num_controls; ++i) {
95                 if (controls[i].addr)
96                         rdmsrl(controls[i].addr, controls[i].saved);
97         }
98 }
99
100 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
101
102 static DEFINE_PER_CPU(int, switch_index);
103
104 inline int op_x86_phys_to_virt(int phys)
105 {
106         return __get_cpu_var(switch_index) + phys;
107 }
108
109 static void nmi_shutdown_mux(void)
110 {
111         int i;
112         for_each_possible_cpu(i) {
113                 kfree(per_cpu(cpu_msrs, i).multiplex);
114                 per_cpu(cpu_msrs, i).multiplex = NULL;
115                 per_cpu(switch_index, i) = 0;
116         }
117 }
118
119 static int nmi_setup_mux(void)
120 {
121         size_t multiplex_size =
122                 sizeof(struct op_msr) * model->num_virt_counters;
123         int i;
124         for_each_possible_cpu(i) {
125                 per_cpu(cpu_msrs, i).multiplex =
126                         kmalloc(multiplex_size, GFP_KERNEL);
127                 if (!per_cpu(cpu_msrs, i).multiplex)
128                         return 0;
129         }
130         return 1;
131 }
132
133 #else
134
135 inline int op_x86_phys_to_virt(int phys) { return phys; }
136 static inline void nmi_shutdown_mux(void) { }
137 static inline int nmi_setup_mux(void) { return 1; }
138
139 #endif
140
141 static void free_msrs(void)
142 {
143         int i;
144         for_each_possible_cpu(i) {
145                 kfree(per_cpu(cpu_msrs, i).counters);
146                 per_cpu(cpu_msrs, i).counters = NULL;
147                 kfree(per_cpu(cpu_msrs, i).controls);
148                 per_cpu(cpu_msrs, i).controls = NULL;
149         }
150 }
151
152 static int allocate_msrs(void)
153 {
154         size_t controls_size = sizeof(struct op_msr) * model->num_controls;
155         size_t counters_size = sizeof(struct op_msr) * model->num_counters;
156
157         int i;
158         for_each_possible_cpu(i) {
159                 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
160                                                         GFP_KERNEL);
161                 if (!per_cpu(cpu_msrs, i).counters)
162                         return 0;
163                 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
164                                                         GFP_KERNEL);
165                 if (!per_cpu(cpu_msrs, i).controls)
166                         return 0;
167         }
168
169         return 1;
170 }
171
172 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
173
174 static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
175 {
176         int i;
177         struct op_msr *multiplex = msrs->multiplex;
178
179         for (i = 0; i < model->num_virt_counters; ++i) {
180                 if (counter_config[i].enabled) {
181                         multiplex[i].saved = -(u64)counter_config[i].count;
182                 } else {
183                         multiplex[i].addr  = 0;
184                         multiplex[i].saved = 0;
185                 }
186         }
187
188         per_cpu(switch_index, cpu) = 0;
189 }
190
191 #else
192
193 static inline void
194 nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
195
196 #endif
197
198 static void nmi_cpu_setup(void *dummy)
199 {
200         int cpu = smp_processor_id();
201         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
202         nmi_cpu_save_registers(msrs);
203         spin_lock(&oprofilefs_lock);
204         model->setup_ctrs(model, msrs);
205         nmi_cpu_setup_mux(cpu, msrs);
206         spin_unlock(&oprofilefs_lock);
207         per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
208         apic_write(APIC_LVTPC, APIC_DM_NMI);
209 }
210
211 static struct notifier_block profile_exceptions_nb = {
212         .notifier_call = profile_exceptions_notify,
213         .next = NULL,
214         .priority = 2
215 };
216
217 static int nmi_setup(void)
218 {
219         int err = 0;
220         int cpu;
221
222         if (!allocate_msrs())
223                 err = -ENOMEM;
224         else if (!nmi_setup_mux())
225                 err = -ENOMEM;
226         else
227                 err = register_die_notifier(&profile_exceptions_nb);
228
229         if (err) {
230                 free_msrs();
231                 nmi_shutdown_mux();
232                 return err;
233         }
234
235         /* We need to serialize save and setup for HT because the subset
236          * of msrs are distinct for save and setup operations
237          */
238
239         /* Assume saved/restored counters are the same on all CPUs */
240         model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
241         for_each_possible_cpu(cpu) {
242                 if (cpu != 0) {
243                         memcpy(per_cpu(cpu_msrs, cpu).counters,
244                                 per_cpu(cpu_msrs, 0).counters,
245                                 sizeof(struct op_msr) * model->num_counters);
246
247                         memcpy(per_cpu(cpu_msrs, cpu).controls,
248                                 per_cpu(cpu_msrs, 0).controls,
249                                 sizeof(struct op_msr) * model->num_controls);
250 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
251                         memcpy(per_cpu(cpu_msrs, cpu).multiplex,
252                                 per_cpu(cpu_msrs, 0).multiplex,
253                                 sizeof(struct op_msr) * model->num_virt_counters);
254 #endif
255                 }
256         }
257         on_each_cpu(nmi_cpu_setup, NULL, 1);
258         nmi_enabled = 1;
259         return 0;
260 }
261
262 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
263
264 static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
265 {
266         struct op_msr *multiplex = msrs->multiplex;
267         int i;
268
269         for (i = 0; i < model->num_counters; ++i) {
270                 int virt = op_x86_phys_to_virt(i);
271                 if (multiplex[virt].addr)
272                         rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
273         }
274 }
275
276 static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
277 {
278         struct op_msr *multiplex = msrs->multiplex;
279         int i;
280
281         for (i = 0; i < model->num_counters; ++i) {
282                 int virt = op_x86_phys_to_virt(i);
283                 if (multiplex[virt].addr)
284                         wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
285         }
286 }
287
288 #endif
289
290 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
291 {
292         struct op_msr *counters = msrs->counters;
293         struct op_msr *controls = msrs->controls;
294         unsigned int i;
295
296         for (i = 0; i < model->num_controls; ++i) {
297                 if (controls[i].addr)
298                         wrmsrl(controls[i].addr, controls[i].saved);
299         }
300
301         for (i = 0; i < model->num_counters; ++i) {
302                 if (counters[i].addr)
303                         wrmsrl(counters[i].addr, counters[i].saved);
304         }
305 }
306
307 static void nmi_cpu_shutdown(void *dummy)
308 {
309         unsigned int v;
310         int cpu = smp_processor_id();
311         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
312
313         /* restoring APIC_LVTPC can trigger an apic error because the delivery
314          * mode and vector nr combination can be illegal. That's by design: on
315          * power on apic lvt contain a zero vector nr which are legal only for
316          * NMI delivery mode. So inhibit apic err before restoring lvtpc
317          */
318         v = apic_read(APIC_LVTERR);
319         apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
320         apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
321         apic_write(APIC_LVTERR, v);
322         nmi_cpu_restore_registers(msrs);
323 }
324
325 static void nmi_shutdown(void)
326 {
327         struct op_msrs *msrs;
328
329         nmi_enabled = 0;
330         on_each_cpu(nmi_cpu_shutdown, NULL, 1);
331         unregister_die_notifier(&profile_exceptions_nb);
332         nmi_shutdown_mux();
333         msrs = &get_cpu_var(cpu_msrs);
334         model->shutdown(msrs);
335         free_msrs();
336         put_cpu_var(cpu_msrs);
337 }
338
339 static void nmi_cpu_start(void *dummy)
340 {
341         struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
342         model->start(msrs);
343 }
344
345 static int nmi_start(void)
346 {
347         on_each_cpu(nmi_cpu_start, NULL, 1);
348         return 0;
349 }
350
351 static void nmi_cpu_stop(void *dummy)
352 {
353         struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
354         model->stop(msrs);
355 }
356
357 static void nmi_stop(void)
358 {
359         on_each_cpu(nmi_cpu_stop, NULL, 1);
360 }
361
362 static int nmi_create_files(struct super_block *sb, struct dentry *root)
363 {
364         unsigned int i;
365
366         for (i = 0; i < model->num_virt_counters; ++i) {
367                 struct dentry *dir;
368                 char buf[4];
369
370 #ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX
371                 /* quick little hack to _not_ expose a counter if it is not
372                  * available for use.  This should protect userspace app.
373                  * NOTE:  assumes 1:1 mapping here (that counters are organized
374                  *        sequentially in their struct assignment).
375                  */
376                 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
377                         continue;
378 #endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */
379
380                 snprintf(buf,  sizeof(buf), "%d", i);
381                 dir = oprofilefs_mkdir(sb, root, buf);
382                 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
383                 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
384                 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
385                 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
386                 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
387                 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
388         }
389
390         return 0;
391 }
392
393 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
394
395 static void nmi_cpu_switch(void *dummy)
396 {
397         int cpu = smp_processor_id();
398         int si = per_cpu(switch_index, cpu);
399         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
400
401         nmi_cpu_stop(NULL);
402         nmi_cpu_save_mpx_registers(msrs);
403
404         /* move to next set */
405         si += model->num_counters;
406         if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
407                 per_cpu(switch_index, cpu) = 0;
408         else
409                 per_cpu(switch_index, cpu) = si;
410
411         model->switch_ctrl(model, msrs);
412         nmi_cpu_restore_mpx_registers(msrs);
413
414         nmi_cpu_start(NULL);
415 }
416
417
418 /*
419  * Quick check to see if multiplexing is necessary.
420  * The check should be sufficient since counters are used
421  * in ordre.
422  */
423 static int nmi_multiplex_on(void)
424 {
425         return counter_config[model->num_counters].count ? 0 : -EINVAL;
426 }
427
428 static int nmi_switch_event(void)
429 {
430         if (!model->switch_ctrl)
431                 return -ENOSYS;         /* not implemented */
432         if (nmi_multiplex_on() < 0)
433                 return -EINVAL;         /* not necessary */
434
435         on_each_cpu(nmi_cpu_switch, NULL, 1);
436
437         atomic_inc(&multiplex_counter);
438
439         return 0;
440 }
441
442 #endif
443
444 #ifdef CONFIG_SMP
445 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
446                                  void *data)
447 {
448         int cpu = (unsigned long)data;
449         switch (action) {
450         case CPU_DOWN_FAILED:
451         case CPU_ONLINE:
452                 smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
453                 break;
454         case CPU_DOWN_PREPARE:
455                 smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
456                 break;
457         }
458         return NOTIFY_DONE;
459 }
460
461 static struct notifier_block oprofile_cpu_nb = {
462         .notifier_call = oprofile_cpu_notifier
463 };
464 #endif
465
466 #ifdef CONFIG_PM
467
468 static int nmi_suspend(struct sys_device *dev, pm_message_t state)
469 {
470         /* Only one CPU left, just stop that one */
471         if (nmi_enabled == 1)
472                 nmi_cpu_stop(NULL);
473         return 0;
474 }
475
476 static int nmi_resume(struct sys_device *dev)
477 {
478         if (nmi_enabled == 1)
479                 nmi_cpu_start(NULL);
480         return 0;
481 }
482
483 static struct sysdev_class oprofile_sysclass = {
484         .name           = "oprofile",
485         .resume         = nmi_resume,
486         .suspend        = nmi_suspend,
487 };
488
489 static struct sys_device device_oprofile = {
490         .id     = 0,
491         .cls    = &oprofile_sysclass,
492 };
493
494 static int __init init_sysfs(void)
495 {
496         int error;
497
498         error = sysdev_class_register(&oprofile_sysclass);
499         if (!error)
500                 error = sysdev_register(&device_oprofile);
501         return error;
502 }
503
504 static void exit_sysfs(void)
505 {
506         sysdev_unregister(&device_oprofile);
507         sysdev_class_unregister(&oprofile_sysclass);
508 }
509
510 #else
511 #define init_sysfs() do { } while (0)
512 #define exit_sysfs() do { } while (0)
513 #endif /* CONFIG_PM */
514
515 static int __init p4_init(char **cpu_type)
516 {
517         __u8 cpu_model = boot_cpu_data.x86_model;
518
519         if (cpu_model > 6 || cpu_model == 5)
520                 return 0;
521
522 #ifndef CONFIG_SMP
523         *cpu_type = "i386/p4";
524         model = &op_p4_spec;
525         return 1;
526 #else
527         switch (smp_num_siblings) {
528         case 1:
529                 *cpu_type = "i386/p4";
530                 model = &op_p4_spec;
531                 return 1;
532
533         case 2:
534                 *cpu_type = "i386/p4-ht";
535                 model = &op_p4_ht2_spec;
536                 return 1;
537         }
538 #endif
539
540         printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
541         printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
542         return 0;
543 }
544
545 static int force_arch_perfmon;
546 static int force_cpu_type(const char *str, struct kernel_param *kp)
547 {
548         if (!strcmp(str, "arch_perfmon")) {
549                 force_arch_perfmon = 1;
550                 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
551         }
552
553         return 0;
554 }
555 module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
556
557 static int __init ppro_init(char **cpu_type)
558 {
559         __u8 cpu_model = boot_cpu_data.x86_model;
560         struct op_x86_model_spec const *spec = &op_ppro_spec;   /* default */
561
562         if (force_arch_perfmon && cpu_has_arch_perfmon)
563                 return 0;
564
565         switch (cpu_model) {
566         case 0 ... 2:
567                 *cpu_type = "i386/ppro";
568                 break;
569         case 3 ... 5:
570                 *cpu_type = "i386/pii";
571                 break;
572         case 6 ... 8:
573         case 10 ... 11:
574                 *cpu_type = "i386/piii";
575                 break;
576         case 9:
577         case 13:
578                 *cpu_type = "i386/p6_mobile";
579                 break;
580         case 14:
581                 *cpu_type = "i386/core";
582                 break;
583         case 15: case 23:
584                 *cpu_type = "i386/core_2";
585                 break;
586         case 26:
587                 spec = &op_arch_perfmon_spec;
588                 *cpu_type = "i386/core_i7";
589                 break;
590         case 28:
591                 *cpu_type = "i386/atom";
592                 break;
593         default:
594                 /* Unknown */
595                 return 0;
596         }
597
598         model = spec;
599         return 1;
600 }
601
602 /* in order to get sysfs right */
603 static int using_nmi;
604
605 int __init op_nmi_init(struct oprofile_operations *ops)
606 {
607         __u8 vendor = boot_cpu_data.x86_vendor;
608         __u8 family = boot_cpu_data.x86;
609         char *cpu_type = NULL;
610         int ret = 0;
611
612         if (!cpu_has_apic)
613                 return -ENODEV;
614
615         switch (vendor) {
616         case X86_VENDOR_AMD:
617                 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
618
619                 switch (family) {
620                 case 6:
621                         cpu_type = "i386/athlon";
622                         break;
623                 case 0xf:
624                         /*
625                          * Actually it could be i386/hammer too, but
626                          * give user space an consistent name.
627                          */
628                         cpu_type = "x86-64/hammer";
629                         break;
630                 case 0x10:
631                         cpu_type = "x86-64/family10";
632                         break;
633                 case 0x11:
634                         cpu_type = "x86-64/family11h";
635                         break;
636                 default:
637                         return -ENODEV;
638                 }
639                 model = &op_amd_spec;
640                 break;
641
642         case X86_VENDOR_INTEL:
643                 switch (family) {
644                         /* Pentium IV */
645                 case 0xf:
646                         p4_init(&cpu_type);
647                         break;
648
649                         /* A P6-class processor */
650                 case 6:
651                         ppro_init(&cpu_type);
652                         break;
653
654                 default:
655                         break;
656                 }
657
658                 if (cpu_type)
659                         break;
660
661                 if (!cpu_has_arch_perfmon)
662                         return -ENODEV;
663
664                 /* use arch perfmon as fallback */
665                 cpu_type = "i386/arch_perfmon";
666                 model = &op_arch_perfmon_spec;
667                 break;
668
669         default:
670                 return -ENODEV;
671         }
672
673 #ifdef CONFIG_SMP
674         register_cpu_notifier(&oprofile_cpu_nb);
675 #endif
676         /* default values, can be overwritten by model */
677         ops->create_files       = nmi_create_files;
678         ops->setup              = nmi_setup;
679         ops->shutdown           = nmi_shutdown;
680         ops->start              = nmi_start;
681         ops->stop               = nmi_stop;
682         ops->cpu_type           = cpu_type;
683 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
684         ops->switch_events      = nmi_switch_event;
685 #endif
686
687         if (model->init)
688                 ret = model->init(ops);
689         if (ret)
690                 return ret;
691
692         init_sysfs();
693         using_nmi = 1;
694         printk(KERN_INFO "oprofile: using NMI interrupt.\n");
695         return 0;
696 }
697
698 void op_nmi_exit(void)
699 {
700         if (using_nmi) {
701                 exit_sysfs();
702 #ifdef CONFIG_SMP
703                 unregister_cpu_notifier(&oprofile_cpu_nb);
704 #endif
705         }
706         if (model->exit)
707                 model->exit();
708 }