trace_syscalls: Remove enter_id exit_id
[safe/jmp/linux-2.6] / kernel / trace / trace_syscalls.c
1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/kernel.h>
4 #include <linux/ftrace.h>
5 #include <linux/perf_event.h>
6 #include <asm/syscall.h>
7
8 #include "trace_output.h"
9 #include "trace.h"
10
11 static DEFINE_MUTEX(syscall_trace_lock);
12 static int sys_refcount_enter;
13 static int sys_refcount_exit;
14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16
17 extern unsigned long __start_syscalls_metadata[];
18 extern unsigned long __stop_syscalls_metadata[];
19
20 static struct syscall_metadata **syscalls_metadata;
21
22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23 {
24         struct syscall_metadata *start;
25         struct syscall_metadata *stop;
26         char str[KSYM_SYMBOL_LEN];
27
28
29         start = (struct syscall_metadata *)__start_syscalls_metadata;
30         stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31         kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33         for ( ; start < stop; start++) {
34                 /*
35                  * Only compare after the "sys" prefix. Archs that use
36                  * syscall wrappers may have syscalls symbols aliases prefixed
37                  * with "SyS" instead of "sys", leading to an unwanted
38                  * mismatch.
39                  */
40                 if (start->name && !strcmp(start->name + 3, str + 3))
41                         return start;
42         }
43         return NULL;
44 }
45
46 static struct syscall_metadata *syscall_nr_to_meta(int nr)
47 {
48         if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49                 return NULL;
50
51         return syscalls_metadata[nr];
52 }
53
54 int syscall_name_to_nr(const char *name)
55 {
56         int i;
57
58         if (!syscalls_metadata)
59                 return -1;
60
61         for (i = 0; i < NR_syscalls; i++) {
62                 if (syscalls_metadata[i]) {
63                         if (!strcmp(syscalls_metadata[i]->name, name))
64                                 return i;
65                 }
66         }
67         return -1;
68 }
69
70 enum print_line_t
71 print_syscall_enter(struct trace_iterator *iter, int flags)
72 {
73         struct trace_seq *s = &iter->seq;
74         struct trace_entry *ent = iter->ent;
75         struct syscall_trace_enter *trace;
76         struct syscall_metadata *entry;
77         int i, ret, syscall;
78
79         trace = (typeof(trace))ent;
80         syscall = trace->nr;
81         entry = syscall_nr_to_meta(syscall);
82
83         if (!entry)
84                 goto end;
85
86         if (entry->enter_event->id != ent->type) {
87                 WARN_ON_ONCE(1);
88                 goto end;
89         }
90
91         ret = trace_seq_printf(s, "%s(", entry->name);
92         if (!ret)
93                 return TRACE_TYPE_PARTIAL_LINE;
94
95         for (i = 0; i < entry->nb_args; i++) {
96                 /* parameter types */
97                 if (trace_flags & TRACE_ITER_VERBOSE) {
98                         ret = trace_seq_printf(s, "%s ", entry->types[i]);
99                         if (!ret)
100                                 return TRACE_TYPE_PARTIAL_LINE;
101                 }
102                 /* parameter values */
103                 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
104                                        trace->args[i],
105                                        i == entry->nb_args - 1 ? "" : ", ");
106                 if (!ret)
107                         return TRACE_TYPE_PARTIAL_LINE;
108         }
109
110         ret = trace_seq_putc(s, ')');
111         if (!ret)
112                 return TRACE_TYPE_PARTIAL_LINE;
113
114 end:
115         ret =  trace_seq_putc(s, '\n');
116         if (!ret)
117                 return TRACE_TYPE_PARTIAL_LINE;
118
119         return TRACE_TYPE_HANDLED;
120 }
121
122 enum print_line_t
123 print_syscall_exit(struct trace_iterator *iter, int flags)
124 {
125         struct trace_seq *s = &iter->seq;
126         struct trace_entry *ent = iter->ent;
127         struct syscall_trace_exit *trace;
128         int syscall;
129         struct syscall_metadata *entry;
130         int ret;
131
132         trace = (typeof(trace))ent;
133         syscall = trace->nr;
134         entry = syscall_nr_to_meta(syscall);
135
136         if (!entry) {
137                 trace_seq_printf(s, "\n");
138                 return TRACE_TYPE_HANDLED;
139         }
140
141         if (entry->exit_event->id != ent->type) {
142                 WARN_ON_ONCE(1);
143                 return TRACE_TYPE_UNHANDLED;
144         }
145
146         ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
147                                 trace->ret);
148         if (!ret)
149                 return TRACE_TYPE_PARTIAL_LINE;
150
151         return TRACE_TYPE_HANDLED;
152 }
153
154 extern char *__bad_type_size(void);
155
156 #define SYSCALL_FIELD(type, name)                                       \
157         sizeof(type) != sizeof(trace.name) ?                            \
158                 __bad_type_size() :                                     \
159                 #type, #name, offsetof(typeof(trace), name),            \
160                 sizeof(trace.name), is_signed_type(type)
161
162 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
163 {
164         int i;
165         int ret;
166         struct syscall_metadata *entry = call->data;
167         struct syscall_trace_enter trace;
168         int offset = offsetof(struct syscall_trace_enter, args);
169
170         ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
171                                "\tsigned:%u;\n",
172                                SYSCALL_FIELD(int, nr));
173         if (!ret)
174                 return 0;
175
176         for (i = 0; i < entry->nb_args; i++) {
177                 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
178                                         entry->args[i]);
179                 if (!ret)
180                         return 0;
181                 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
182                                        "\tsigned:%u;\n", offset,
183                                        sizeof(unsigned long),
184                                        is_signed_type(unsigned long));
185                 if (!ret)
186                         return 0;
187                 offset += sizeof(unsigned long);
188         }
189
190         trace_seq_puts(s, "\nprint fmt: \"");
191         for (i = 0; i < entry->nb_args; i++) {
192                 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
193                                         sizeof(unsigned long),
194                                         i == entry->nb_args - 1 ? "" : ", ");
195                 if (!ret)
196                         return 0;
197         }
198         trace_seq_putc(s, '"');
199
200         for (i = 0; i < entry->nb_args; i++) {
201                 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
202                                        entry->args[i]);
203                 if (!ret)
204                         return 0;
205         }
206
207         return trace_seq_putc(s, '\n');
208 }
209
210 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
211 {
212         int ret;
213         struct syscall_trace_exit trace;
214
215         ret = trace_seq_printf(s,
216                                "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
217                                "\tsigned:%u;\n"
218                                "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
219                                "\tsigned:%u;\n",
220                                SYSCALL_FIELD(int, nr),
221                                SYSCALL_FIELD(long, ret));
222         if (!ret)
223                 return 0;
224
225         return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
226 }
227
228 int syscall_enter_define_fields(struct ftrace_event_call *call)
229 {
230         struct syscall_trace_enter trace;
231         struct syscall_metadata *meta = call->data;
232         int ret;
233         int i;
234         int offset = offsetof(typeof(trace), args);
235
236         ret = trace_define_common_fields(call);
237         if (ret)
238                 return ret;
239
240         ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
241         if (ret)
242                 return ret;
243
244         for (i = 0; i < meta->nb_args; i++) {
245                 ret = trace_define_field(call, meta->types[i],
246                                          meta->args[i], offset,
247                                          sizeof(unsigned long), 0,
248                                          FILTER_OTHER);
249                 offset += sizeof(unsigned long);
250         }
251
252         return ret;
253 }
254
255 int syscall_exit_define_fields(struct ftrace_event_call *call)
256 {
257         struct syscall_trace_exit trace;
258         int ret;
259
260         ret = trace_define_common_fields(call);
261         if (ret)
262                 return ret;
263
264         ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
265         if (ret)
266                 return ret;
267
268         ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
269                                  FILTER_OTHER);
270
271         return ret;
272 }
273
274 void ftrace_syscall_enter(struct pt_regs *regs, long id)
275 {
276         struct syscall_trace_enter *entry;
277         struct syscall_metadata *sys_data;
278         struct ring_buffer_event *event;
279         struct ring_buffer *buffer;
280         int size;
281         int syscall_nr;
282
283         syscall_nr = syscall_get_nr(current, regs);
284         if (syscall_nr < 0)
285                 return;
286         if (!test_bit(syscall_nr, enabled_enter_syscalls))
287                 return;
288
289         sys_data = syscall_nr_to_meta(syscall_nr);
290         if (!sys_data)
291                 return;
292
293         size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
294
295         event = trace_current_buffer_lock_reserve(&buffer,
296                         sys_data->enter_event->id, size, 0, 0);
297         if (!event)
298                 return;
299
300         entry = ring_buffer_event_data(event);
301         entry->nr = syscall_nr;
302         syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
303
304         if (!filter_current_check_discard(buffer, sys_data->enter_event,
305                                           entry, event))
306                 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
307 }
308
309 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
310 {
311         struct syscall_trace_exit *entry;
312         struct syscall_metadata *sys_data;
313         struct ring_buffer_event *event;
314         struct ring_buffer *buffer;
315         int syscall_nr;
316
317         syscall_nr = syscall_get_nr(current, regs);
318         if (syscall_nr < 0)
319                 return;
320         if (!test_bit(syscall_nr, enabled_exit_syscalls))
321                 return;
322
323         sys_data = syscall_nr_to_meta(syscall_nr);
324         if (!sys_data)
325                 return;
326
327         event = trace_current_buffer_lock_reserve(&buffer,
328                         sys_data->exit_event->id, sizeof(*entry), 0, 0);
329         if (!event)
330                 return;
331
332         entry = ring_buffer_event_data(event);
333         entry->nr = syscall_nr;
334         entry->ret = syscall_get_return_value(current, regs);
335
336         if (!filter_current_check_discard(buffer, sys_data->exit_event,
337                                           entry, event))
338                 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
339 }
340
341 int reg_event_syscall_enter(struct ftrace_event_call *call)
342 {
343         int ret = 0;
344         int num;
345         const char *name;
346
347         name = ((struct syscall_metadata *)call->data)->name;
348         num = syscall_name_to_nr(name);
349         if (num < 0 || num >= NR_syscalls)
350                 return -ENOSYS;
351         mutex_lock(&syscall_trace_lock);
352         if (!sys_refcount_enter)
353                 ret = register_trace_sys_enter(ftrace_syscall_enter);
354         if (ret) {
355                 pr_info("event trace: Could not activate"
356                                 "syscall entry trace point");
357         } else {
358                 set_bit(num, enabled_enter_syscalls);
359                 sys_refcount_enter++;
360         }
361         mutex_unlock(&syscall_trace_lock);
362         return ret;
363 }
364
365 void unreg_event_syscall_enter(struct ftrace_event_call *call)
366 {
367         int num;
368         const char *name;
369
370         name = ((struct syscall_metadata *)call->data)->name;
371         num = syscall_name_to_nr(name);
372         if (num < 0 || num >= NR_syscalls)
373                 return;
374         mutex_lock(&syscall_trace_lock);
375         sys_refcount_enter--;
376         clear_bit(num, enabled_enter_syscalls);
377         if (!sys_refcount_enter)
378                 unregister_trace_sys_enter(ftrace_syscall_enter);
379         mutex_unlock(&syscall_trace_lock);
380 }
381
382 int reg_event_syscall_exit(struct ftrace_event_call *call)
383 {
384         int ret = 0;
385         int num;
386         const char *name;
387
388         name = ((struct syscall_metadata *)call->data)->name;
389         num = syscall_name_to_nr(name);
390         if (num < 0 || num >= NR_syscalls)
391                 return -ENOSYS;
392         mutex_lock(&syscall_trace_lock);
393         if (!sys_refcount_exit)
394                 ret = register_trace_sys_exit(ftrace_syscall_exit);
395         if (ret) {
396                 pr_info("event trace: Could not activate"
397                                 "syscall exit trace point");
398         } else {
399                 set_bit(num, enabled_exit_syscalls);
400                 sys_refcount_exit++;
401         }
402         mutex_unlock(&syscall_trace_lock);
403         return ret;
404 }
405
406 void unreg_event_syscall_exit(struct ftrace_event_call *call)
407 {
408         int num;
409         const char *name;
410
411         name = ((struct syscall_metadata *)call->data)->name;
412         num = syscall_name_to_nr(name);
413         if (num < 0 || num >= NR_syscalls)
414                 return;
415         mutex_lock(&syscall_trace_lock);
416         sys_refcount_exit--;
417         clear_bit(num, enabled_exit_syscalls);
418         if (!sys_refcount_exit)
419                 unregister_trace_sys_exit(ftrace_syscall_exit);
420         mutex_unlock(&syscall_trace_lock);
421 }
422
423 int __init init_ftrace_syscalls(void)
424 {
425         struct syscall_metadata *meta;
426         unsigned long addr;
427         int i;
428
429         syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
430                                         NR_syscalls, GFP_KERNEL);
431         if (!syscalls_metadata) {
432                 WARN_ON(1);
433                 return -ENOMEM;
434         }
435
436         for (i = 0; i < NR_syscalls; i++) {
437                 addr = arch_syscall_addr(i);
438                 meta = find_syscall_meta(addr);
439                 syscalls_metadata[i] = meta;
440         }
441
442         return 0;
443 }
444 core_initcall(init_ftrace_syscalls);
445
446 #ifdef CONFIG_EVENT_PROFILE
447
448 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
449 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
450 static int sys_prof_refcount_enter;
451 static int sys_prof_refcount_exit;
452
453 static void prof_syscall_enter(struct pt_regs *regs, long id)
454 {
455         struct syscall_metadata *sys_data;
456         struct syscall_trace_enter *rec;
457         unsigned long flags;
458         char *trace_buf;
459         char *raw_data;
460         int syscall_nr;
461         int rctx;
462         int size;
463         int cpu;
464
465         syscall_nr = syscall_get_nr(current, regs);
466         if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
467                 return;
468
469         sys_data = syscall_nr_to_meta(syscall_nr);
470         if (!sys_data)
471                 return;
472
473         /* get the size after alignment with the u32 buffer size field */
474         size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
475         size = ALIGN(size + sizeof(u32), sizeof(u64));
476         size -= sizeof(u32);
477
478         if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
479                       "profile buffer not large enough"))
480                 return;
481
482         /* Protect the per cpu buffer, begin the rcu read side */
483         local_irq_save(flags);
484
485         rctx = perf_swevent_get_recursion_context();
486         if (rctx < 0)
487                 goto end_recursion;
488
489         cpu = smp_processor_id();
490
491         trace_buf = rcu_dereference(perf_trace_buf);
492
493         if (!trace_buf)
494                 goto end;
495
496         raw_data = per_cpu_ptr(trace_buf, cpu);
497
498         /* zero the dead bytes from align to not leak stack to user */
499         *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
500
501         rec = (struct syscall_trace_enter *) raw_data;
502         tracing_generic_entry_update(&rec->ent, 0, 0);
503         rec->ent.type = sys_data->enter_event->id;
504         rec->nr = syscall_nr;
505         syscall_get_arguments(current, regs, 0, sys_data->nb_args,
506                                (unsigned long *)&rec->args);
507         perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
508
509 end:
510         perf_swevent_put_recursion_context(rctx);
511 end_recursion:
512         local_irq_restore(flags);
513 }
514
515 int reg_prof_syscall_enter(char *name)
516 {
517         int ret = 0;
518         int num;
519
520         num = syscall_name_to_nr(name);
521         if (num < 0 || num >= NR_syscalls)
522                 return -ENOSYS;
523
524         mutex_lock(&syscall_trace_lock);
525         if (!sys_prof_refcount_enter)
526                 ret = register_trace_sys_enter(prof_syscall_enter);
527         if (ret) {
528                 pr_info("event trace: Could not activate"
529                                 "syscall entry trace point");
530         } else {
531                 set_bit(num, enabled_prof_enter_syscalls);
532                 sys_prof_refcount_enter++;
533         }
534         mutex_unlock(&syscall_trace_lock);
535         return ret;
536 }
537
538 void unreg_prof_syscall_enter(char *name)
539 {
540         int num;
541
542         num = syscall_name_to_nr(name);
543         if (num < 0 || num >= NR_syscalls)
544                 return;
545
546         mutex_lock(&syscall_trace_lock);
547         sys_prof_refcount_enter--;
548         clear_bit(num, enabled_prof_enter_syscalls);
549         if (!sys_prof_refcount_enter)
550                 unregister_trace_sys_enter(prof_syscall_enter);
551         mutex_unlock(&syscall_trace_lock);
552 }
553
554 static void prof_syscall_exit(struct pt_regs *regs, long ret)
555 {
556         struct syscall_metadata *sys_data;
557         struct syscall_trace_exit *rec;
558         unsigned long flags;
559         int syscall_nr;
560         char *trace_buf;
561         char *raw_data;
562         int rctx;
563         int size;
564         int cpu;
565
566         syscall_nr = syscall_get_nr(current, regs);
567         if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
568                 return;
569
570         sys_data = syscall_nr_to_meta(syscall_nr);
571         if (!sys_data)
572                 return;
573
574         /* We can probably do that at build time */
575         size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
576         size -= sizeof(u32);
577
578         /*
579          * Impossible, but be paranoid with the future
580          * How to put this check outside runtime?
581          */
582         if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
583                 "exit event has grown above profile buffer size"))
584                 return;
585
586         /* Protect the per cpu buffer, begin the rcu read side */
587         local_irq_save(flags);
588
589         rctx = perf_swevent_get_recursion_context();
590         if (rctx < 0)
591                 goto end_recursion;
592
593         cpu = smp_processor_id();
594
595         trace_buf = rcu_dereference(perf_trace_buf);
596
597         if (!trace_buf)
598                 goto end;
599
600         raw_data = per_cpu_ptr(trace_buf, cpu);
601
602         /* zero the dead bytes from align to not leak stack to user */
603         *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
604
605         rec = (struct syscall_trace_exit *)raw_data;
606
607         tracing_generic_entry_update(&rec->ent, 0, 0);
608         rec->ent.type = sys_data->exit_event->id;
609         rec->nr = syscall_nr;
610         rec->ret = syscall_get_return_value(current, regs);
611
612         perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
613
614 end:
615         perf_swevent_put_recursion_context(rctx);
616 end_recursion:
617         local_irq_restore(flags);
618 }
619
620 int reg_prof_syscall_exit(char *name)
621 {
622         int ret = 0;
623         int num;
624
625         num = syscall_name_to_nr(name);
626         if (num < 0 || num >= NR_syscalls)
627                 return -ENOSYS;
628
629         mutex_lock(&syscall_trace_lock);
630         if (!sys_prof_refcount_exit)
631                 ret = register_trace_sys_exit(prof_syscall_exit);
632         if (ret) {
633                 pr_info("event trace: Could not activate"
634                                 "syscall entry trace point");
635         } else {
636                 set_bit(num, enabled_prof_exit_syscalls);
637                 sys_prof_refcount_exit++;
638         }
639         mutex_unlock(&syscall_trace_lock);
640         return ret;
641 }
642
643 void unreg_prof_syscall_exit(char *name)
644 {
645         int num;
646
647         num = syscall_name_to_nr(name);
648         if (num < 0 || num >= NR_syscalls)
649                 return;
650
651         mutex_lock(&syscall_trace_lock);
652         sys_prof_refcount_exit--;
653         clear_bit(num, enabled_prof_exit_syscalls);
654         if (!sys_prof_refcount_exit)
655                 unregister_trace_sys_exit(prof_syscall_exit);
656         mutex_unlock(&syscall_trace_lock);
657 }
658
659 #endif
660
661