1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/kernel.h>
4 #include <linux/ftrace.h>
5 #include <linux/perf_event.h>
6 #include <asm/syscall.h>
8 #include "trace_output.h"
11 static DEFINE_MUTEX(syscall_trace_lock);
12 static int sys_refcount_enter;
13 static int sys_refcount_exit;
14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17 extern unsigned long __start_syscalls_metadata[];
18 extern unsigned long __stop_syscalls_metadata[];
20 static struct syscall_metadata **syscalls_metadata;
22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
33 for ( ; start < stop; start++) {
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
40 if (start->name && !strcmp(start->name + 3, str + 3))
46 static struct syscall_metadata *syscall_nr_to_meta(int nr)
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
51 return syscalls_metadata[nr];
54 int syscall_name_to_nr(const char *name)
58 if (!syscalls_metadata)
61 for (i = 0; i < NR_syscalls; i++) {
62 if (syscalls_metadata[i]) {
63 if (!strcmp(syscalls_metadata[i]->name, name))
71 print_syscall_enter(struct trace_iterator *iter, int flags)
73 struct trace_seq *s = &iter->seq;
74 struct trace_entry *ent = iter->ent;
75 struct syscall_trace_enter *trace;
76 struct syscall_metadata *entry;
79 trace = (typeof(trace))ent;
81 entry = syscall_nr_to_meta(syscall);
86 if (entry->enter_event->id != ent->type) {
91 ret = trace_seq_printf(s, "%s(", entry->name);
93 return TRACE_TYPE_PARTIAL_LINE;
95 for (i = 0; i < entry->nb_args; i++) {
97 if (trace_flags & TRACE_ITER_VERBOSE) {
98 ret = trace_seq_printf(s, "%s ", entry->types[i]);
100 return TRACE_TYPE_PARTIAL_LINE;
102 /* parameter values */
103 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
105 i == entry->nb_args - 1 ? "" : ", ");
107 return TRACE_TYPE_PARTIAL_LINE;
110 ret = trace_seq_putc(s, ')');
112 return TRACE_TYPE_PARTIAL_LINE;
115 ret = trace_seq_putc(s, '\n');
117 return TRACE_TYPE_PARTIAL_LINE;
119 return TRACE_TYPE_HANDLED;
123 print_syscall_exit(struct trace_iterator *iter, int flags)
125 struct trace_seq *s = &iter->seq;
126 struct trace_entry *ent = iter->ent;
127 struct syscall_trace_exit *trace;
129 struct syscall_metadata *entry;
132 trace = (typeof(trace))ent;
134 entry = syscall_nr_to_meta(syscall);
137 trace_seq_printf(s, "\n");
138 return TRACE_TYPE_HANDLED;
141 if (entry->exit_event->id != ent->type) {
143 return TRACE_TYPE_UNHANDLED;
146 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
149 return TRACE_TYPE_PARTIAL_LINE;
151 return TRACE_TYPE_HANDLED;
154 extern char *__bad_type_size(void);
156 #define SYSCALL_FIELD(type, name) \
157 sizeof(type) != sizeof(trace.name) ? \
158 __bad_type_size() : \
159 #type, #name, offsetof(typeof(trace), name), \
160 sizeof(trace.name), is_signed_type(type)
162 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
166 struct syscall_metadata *entry = call->data;
167 struct syscall_trace_enter trace;
168 int offset = offsetof(struct syscall_trace_enter, args);
170 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
172 SYSCALL_FIELD(int, nr));
176 for (i = 0; i < entry->nb_args; i++) {
177 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
181 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
182 "\tsigned:%u;\n", offset,
183 sizeof(unsigned long),
184 is_signed_type(unsigned long));
187 offset += sizeof(unsigned long);
190 trace_seq_puts(s, "\nprint fmt: \"");
191 for (i = 0; i < entry->nb_args; i++) {
192 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
193 sizeof(unsigned long),
194 i == entry->nb_args - 1 ? "" : ", ");
198 trace_seq_putc(s, '"');
200 for (i = 0; i < entry->nb_args; i++) {
201 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
207 return trace_seq_putc(s, '\n');
210 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
213 struct syscall_trace_exit trace;
215 ret = trace_seq_printf(s,
216 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
218 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
220 SYSCALL_FIELD(int, nr),
221 SYSCALL_FIELD(long, ret));
225 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
228 int syscall_enter_define_fields(struct ftrace_event_call *call)
230 struct syscall_trace_enter trace;
231 struct syscall_metadata *meta = call->data;
234 int offset = offsetof(typeof(trace), args);
236 ret = trace_define_common_fields(call);
240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
244 for (i = 0; i < meta->nb_args; i++) {
245 ret = trace_define_field(call, meta->types[i],
246 meta->args[i], offset,
247 sizeof(unsigned long), 0,
249 offset += sizeof(unsigned long);
255 int syscall_exit_define_fields(struct ftrace_event_call *call)
257 struct syscall_trace_exit trace;
260 ret = trace_define_common_fields(call);
264 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
268 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
274 void ftrace_syscall_enter(struct pt_regs *regs, long id)
276 struct syscall_trace_enter *entry;
277 struct syscall_metadata *sys_data;
278 struct ring_buffer_event *event;
279 struct ring_buffer *buffer;
283 syscall_nr = syscall_get_nr(current, regs);
286 if (!test_bit(syscall_nr, enabled_enter_syscalls))
289 sys_data = syscall_nr_to_meta(syscall_nr);
293 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
295 event = trace_current_buffer_lock_reserve(&buffer,
296 sys_data->enter_event->id, size, 0, 0);
300 entry = ring_buffer_event_data(event);
301 entry->nr = syscall_nr;
302 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
304 if (!filter_current_check_discard(buffer, sys_data->enter_event,
306 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
309 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
311 struct syscall_trace_exit *entry;
312 struct syscall_metadata *sys_data;
313 struct ring_buffer_event *event;
314 struct ring_buffer *buffer;
317 syscall_nr = syscall_get_nr(current, regs);
320 if (!test_bit(syscall_nr, enabled_exit_syscalls))
323 sys_data = syscall_nr_to_meta(syscall_nr);
327 event = trace_current_buffer_lock_reserve(&buffer,
328 sys_data->exit_event->id, sizeof(*entry), 0, 0);
332 entry = ring_buffer_event_data(event);
333 entry->nr = syscall_nr;
334 entry->ret = syscall_get_return_value(current, regs);
336 if (!filter_current_check_discard(buffer, sys_data->exit_event,
338 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
341 int reg_event_syscall_enter(struct ftrace_event_call *call)
347 name = ((struct syscall_metadata *)call->data)->name;
348 num = syscall_name_to_nr(name);
349 if (num < 0 || num >= NR_syscalls)
351 mutex_lock(&syscall_trace_lock);
352 if (!sys_refcount_enter)
353 ret = register_trace_sys_enter(ftrace_syscall_enter);
355 pr_info("event trace: Could not activate"
356 "syscall entry trace point");
358 set_bit(num, enabled_enter_syscalls);
359 sys_refcount_enter++;
361 mutex_unlock(&syscall_trace_lock);
365 void unreg_event_syscall_enter(struct ftrace_event_call *call)
370 name = ((struct syscall_metadata *)call->data)->name;
371 num = syscall_name_to_nr(name);
372 if (num < 0 || num >= NR_syscalls)
374 mutex_lock(&syscall_trace_lock);
375 sys_refcount_enter--;
376 clear_bit(num, enabled_enter_syscalls);
377 if (!sys_refcount_enter)
378 unregister_trace_sys_enter(ftrace_syscall_enter);
379 mutex_unlock(&syscall_trace_lock);
382 int reg_event_syscall_exit(struct ftrace_event_call *call)
388 name = ((struct syscall_metadata *)call->data)->name;
389 num = syscall_name_to_nr(name);
390 if (num < 0 || num >= NR_syscalls)
392 mutex_lock(&syscall_trace_lock);
393 if (!sys_refcount_exit)
394 ret = register_trace_sys_exit(ftrace_syscall_exit);
396 pr_info("event trace: Could not activate"
397 "syscall exit trace point");
399 set_bit(num, enabled_exit_syscalls);
402 mutex_unlock(&syscall_trace_lock);
406 void unreg_event_syscall_exit(struct ftrace_event_call *call)
411 name = ((struct syscall_metadata *)call->data)->name;
412 num = syscall_name_to_nr(name);
413 if (num < 0 || num >= NR_syscalls)
415 mutex_lock(&syscall_trace_lock);
417 clear_bit(num, enabled_exit_syscalls);
418 if (!sys_refcount_exit)
419 unregister_trace_sys_exit(ftrace_syscall_exit);
420 mutex_unlock(&syscall_trace_lock);
423 int __init init_ftrace_syscalls(void)
425 struct syscall_metadata *meta;
429 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
430 NR_syscalls, GFP_KERNEL);
431 if (!syscalls_metadata) {
436 for (i = 0; i < NR_syscalls; i++) {
437 addr = arch_syscall_addr(i);
438 meta = find_syscall_meta(addr);
439 syscalls_metadata[i] = meta;
444 core_initcall(init_ftrace_syscalls);
446 #ifdef CONFIG_EVENT_PROFILE
448 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
449 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
450 static int sys_prof_refcount_enter;
451 static int sys_prof_refcount_exit;
453 static void prof_syscall_enter(struct pt_regs *regs, long id)
455 struct syscall_metadata *sys_data;
456 struct syscall_trace_enter *rec;
465 syscall_nr = syscall_get_nr(current, regs);
466 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
469 sys_data = syscall_nr_to_meta(syscall_nr);
473 /* get the size after alignment with the u32 buffer size field */
474 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
475 size = ALIGN(size + sizeof(u32), sizeof(u64));
478 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
479 "profile buffer not large enough"))
482 /* Protect the per cpu buffer, begin the rcu read side */
483 local_irq_save(flags);
485 rctx = perf_swevent_get_recursion_context();
489 cpu = smp_processor_id();
491 trace_buf = rcu_dereference(perf_trace_buf);
496 raw_data = per_cpu_ptr(trace_buf, cpu);
498 /* zero the dead bytes from align to not leak stack to user */
499 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
501 rec = (struct syscall_trace_enter *) raw_data;
502 tracing_generic_entry_update(&rec->ent, 0, 0);
503 rec->ent.type = sys_data->enter_event->id;
504 rec->nr = syscall_nr;
505 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
506 (unsigned long *)&rec->args);
507 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
510 perf_swevent_put_recursion_context(rctx);
512 local_irq_restore(flags);
515 int reg_prof_syscall_enter(char *name)
520 num = syscall_name_to_nr(name);
521 if (num < 0 || num >= NR_syscalls)
524 mutex_lock(&syscall_trace_lock);
525 if (!sys_prof_refcount_enter)
526 ret = register_trace_sys_enter(prof_syscall_enter);
528 pr_info("event trace: Could not activate"
529 "syscall entry trace point");
531 set_bit(num, enabled_prof_enter_syscalls);
532 sys_prof_refcount_enter++;
534 mutex_unlock(&syscall_trace_lock);
538 void unreg_prof_syscall_enter(char *name)
542 num = syscall_name_to_nr(name);
543 if (num < 0 || num >= NR_syscalls)
546 mutex_lock(&syscall_trace_lock);
547 sys_prof_refcount_enter--;
548 clear_bit(num, enabled_prof_enter_syscalls);
549 if (!sys_prof_refcount_enter)
550 unregister_trace_sys_enter(prof_syscall_enter);
551 mutex_unlock(&syscall_trace_lock);
554 static void prof_syscall_exit(struct pt_regs *regs, long ret)
556 struct syscall_metadata *sys_data;
557 struct syscall_trace_exit *rec;
566 syscall_nr = syscall_get_nr(current, regs);
567 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
570 sys_data = syscall_nr_to_meta(syscall_nr);
574 /* We can probably do that at build time */
575 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
579 * Impossible, but be paranoid with the future
580 * How to put this check outside runtime?
582 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
583 "exit event has grown above profile buffer size"))
586 /* Protect the per cpu buffer, begin the rcu read side */
587 local_irq_save(flags);
589 rctx = perf_swevent_get_recursion_context();
593 cpu = smp_processor_id();
595 trace_buf = rcu_dereference(perf_trace_buf);
600 raw_data = per_cpu_ptr(trace_buf, cpu);
602 /* zero the dead bytes from align to not leak stack to user */
603 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
605 rec = (struct syscall_trace_exit *)raw_data;
607 tracing_generic_entry_update(&rec->ent, 0, 0);
608 rec->ent.type = sys_data->exit_event->id;
609 rec->nr = syscall_nr;
610 rec->ret = syscall_get_return_value(current, regs);
612 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
615 perf_swevent_put_recursion_context(rctx);
617 local_irq_restore(flags);
620 int reg_prof_syscall_exit(char *name)
625 num = syscall_name_to_nr(name);
626 if (num < 0 || num >= NR_syscalls)
629 mutex_lock(&syscall_trace_lock);
630 if (!sys_prof_refcount_exit)
631 ret = register_trace_sys_exit(prof_syscall_exit);
633 pr_info("event trace: Could not activate"
634 "syscall entry trace point");
636 set_bit(num, enabled_prof_exit_syscalls);
637 sys_prof_refcount_exit++;
639 mutex_unlock(&syscall_trace_lock);
643 void unreg_prof_syscall_exit(char *name)
647 num = syscall_name_to_nr(name);
648 if (num < 0 || num >= NR_syscalls)
651 mutex_lock(&syscall_trace_lock);
652 sys_prof_refcount_exit--;
653 clear_bit(num, enabled_prof_exit_syscalls);
654 if (!sys_prof_refcount_exit)
655 unregister_trace_sys_exit(prof_syscall_exit);
656 mutex_unlock(&syscall_trace_lock);