perf: Take a hot regs snapshot for trace events
[safe/jmp/linux-2.6] / kernel / trace / trace_event_perf.c
1 /*
2  * trace event based perf event profiling/tracing
3  *
4  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6  */
7
8 #include <linux/module.h>
9 #include <linux/kprobes.h>
10 #include "trace.h"
11
12 DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13
14 static char *perf_trace_buf;
15 static char *perf_trace_buf_nmi;
16
17 typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
18
19 /* Count the events in use (per event id, not per instance) */
20 static int      total_ref_count;
21
22 static int perf_trace_event_enable(struct ftrace_event_call *event)
23 {
24         char *buf;
25         int ret = -ENOMEM;
26
27         if (event->perf_refcount++ > 0)
28                 return 0;
29
30         if (!total_ref_count) {
31                 buf = (char *)alloc_percpu(perf_trace_t);
32                 if (!buf)
33                         goto fail_buf;
34
35                 rcu_assign_pointer(perf_trace_buf, buf);
36
37                 buf = (char *)alloc_percpu(perf_trace_t);
38                 if (!buf)
39                         goto fail_buf_nmi;
40
41                 rcu_assign_pointer(perf_trace_buf_nmi, buf);
42         }
43
44         ret = event->perf_event_enable(event);
45         if (!ret) {
46                 total_ref_count++;
47                 return 0;
48         }
49
50 fail_buf_nmi:
51         if (!total_ref_count) {
52                 free_percpu(perf_trace_buf_nmi);
53                 free_percpu(perf_trace_buf);
54                 perf_trace_buf_nmi = NULL;
55                 perf_trace_buf = NULL;
56         }
57 fail_buf:
58         event->perf_refcount--;
59
60         return ret;
61 }
62
63 int perf_trace_enable(int event_id)
64 {
65         struct ftrace_event_call *event;
66         int ret = -EINVAL;
67
68         mutex_lock(&event_mutex);
69         list_for_each_entry(event, &ftrace_events, list) {
70                 if (event->id == event_id && event->perf_event_enable &&
71                     try_module_get(event->mod)) {
72                         ret = perf_trace_event_enable(event);
73                         break;
74                 }
75         }
76         mutex_unlock(&event_mutex);
77
78         return ret;
79 }
80
81 static void perf_trace_event_disable(struct ftrace_event_call *event)
82 {
83         char *buf, *nmi_buf;
84
85         if (--event->perf_refcount > 0)
86                 return;
87
88         event->perf_event_disable(event);
89
90         if (!--total_ref_count) {
91                 buf = perf_trace_buf;
92                 rcu_assign_pointer(perf_trace_buf, NULL);
93
94                 nmi_buf = perf_trace_buf_nmi;
95                 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
96
97                 /*
98                  * Ensure every events in profiling have finished before
99                  * releasing the buffers
100                  */
101                 synchronize_sched();
102
103                 free_percpu(buf);
104                 free_percpu(nmi_buf);
105         }
106 }
107
108 void perf_trace_disable(int event_id)
109 {
110         struct ftrace_event_call *event;
111
112         mutex_lock(&event_mutex);
113         list_for_each_entry(event, &ftrace_events, list) {
114                 if (event->id == event_id) {
115                         perf_trace_event_disable(event);
116                         module_put(event->mod);
117                         break;
118                 }
119         }
120         mutex_unlock(&event_mutex);
121 }
122
123 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
124                                        int *rctxp, unsigned long *irq_flags)
125 {
126         struct trace_entry *entry;
127         char *trace_buf, *raw_data;
128         int pc, cpu;
129
130         pc = preempt_count();
131
132         /* Protect the per cpu buffer, begin the rcu read side */
133         local_irq_save(*irq_flags);
134
135         *rctxp = perf_swevent_get_recursion_context();
136         if (*rctxp < 0)
137                 goto err_recursion;
138
139         cpu = smp_processor_id();
140
141         if (in_nmi())
142                 trace_buf = rcu_dereference(perf_trace_buf_nmi);
143         else
144                 trace_buf = rcu_dereference(perf_trace_buf);
145
146         if (!trace_buf)
147                 goto err;
148
149         raw_data = per_cpu_ptr(trace_buf, cpu);
150
151         /* zero the dead bytes from align to not leak stack to user */
152         *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
153
154         entry = (struct trace_entry *)raw_data;
155         tracing_generic_entry_update(entry, *irq_flags, pc);
156         entry->type = type;
157
158         return raw_data;
159 err:
160         perf_swevent_put_recursion_context(*rctxp);
161 err_recursion:
162         local_irq_restore(*irq_flags);
163         return NULL;
164 }
165 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);