1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/kernel.h>
4 #include <linux/ftrace.h>
5 #include <linux/perf_event.h>
6 #include <asm/syscall.h>
8 #include "trace_output.h"
11 static DEFINE_MUTEX(syscall_trace_lock);
12 static int sys_refcount_enter;
13 static int sys_refcount_exit;
14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17 extern unsigned long __start_syscalls_metadata[];
18 extern unsigned long __stop_syscalls_metadata[];
20 static struct syscall_metadata **syscalls_metadata;
22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
33 for ( ; start < stop; start++) {
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
40 if (start->name && !strcmp(start->name + 3, str + 3))
46 static struct syscall_metadata *syscall_nr_to_meta(int nr)
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
51 return syscalls_metadata[nr];
54 int syscall_name_to_nr(const char *name)
58 if (!syscalls_metadata)
61 for (i = 0; i < NR_syscalls; i++) {
62 if (syscalls_metadata[i]) {
63 if (!strcmp(syscalls_metadata[i]->name, name))
70 void set_syscall_enter_id(int num, int id)
72 syscalls_metadata[num]->enter_id = id;
75 void set_syscall_exit_id(int num, int id)
77 syscalls_metadata[num]->exit_id = id;
81 print_syscall_enter(struct trace_iterator *iter, int flags)
83 struct trace_seq *s = &iter->seq;
84 struct trace_entry *ent = iter->ent;
85 struct syscall_trace_enter *trace;
86 struct syscall_metadata *entry;
89 trace = (typeof(trace))ent;
91 entry = syscall_nr_to_meta(syscall);
96 if (entry->enter_id != ent->type) {
101 ret = trace_seq_printf(s, "%s(", entry->name);
103 return TRACE_TYPE_PARTIAL_LINE;
105 for (i = 0; i < entry->nb_args; i++) {
106 /* parameter types */
107 if (trace_flags & TRACE_ITER_VERBOSE) {
108 ret = trace_seq_printf(s, "%s ", entry->types[i]);
110 return TRACE_TYPE_PARTIAL_LINE;
112 /* parameter values */
113 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
115 i == entry->nb_args - 1 ? "" : ", ");
117 return TRACE_TYPE_PARTIAL_LINE;
120 ret = trace_seq_putc(s, ')');
122 return TRACE_TYPE_PARTIAL_LINE;
125 ret = trace_seq_putc(s, '\n');
127 return TRACE_TYPE_PARTIAL_LINE;
129 return TRACE_TYPE_HANDLED;
133 print_syscall_exit(struct trace_iterator *iter, int flags)
135 struct trace_seq *s = &iter->seq;
136 struct trace_entry *ent = iter->ent;
137 struct syscall_trace_exit *trace;
139 struct syscall_metadata *entry;
142 trace = (typeof(trace))ent;
144 entry = syscall_nr_to_meta(syscall);
147 trace_seq_printf(s, "\n");
148 return TRACE_TYPE_HANDLED;
151 if (entry->exit_id != ent->type) {
153 return TRACE_TYPE_UNHANDLED;
156 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
159 return TRACE_TYPE_PARTIAL_LINE;
161 return TRACE_TYPE_HANDLED;
164 extern char *__bad_type_size(void);
166 #define SYSCALL_FIELD(type, name) \
167 sizeof(type) != sizeof(trace.name) ? \
168 __bad_type_size() : \
169 #type, #name, offsetof(typeof(trace), name), \
170 sizeof(trace.name), is_signed_type(type)
172 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
176 struct syscall_metadata *entry = call->data;
177 struct syscall_trace_enter trace;
178 int offset = offsetof(struct syscall_trace_enter, args);
180 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
182 SYSCALL_FIELD(int, nr));
186 for (i = 0; i < entry->nb_args; i++) {
187 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
191 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
192 "\tsigned:%u;\n", offset,
193 sizeof(unsigned long),
194 is_signed_type(unsigned long));
197 offset += sizeof(unsigned long);
200 trace_seq_puts(s, "\nprint fmt: \"");
201 for (i = 0; i < entry->nb_args; i++) {
202 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
203 sizeof(unsigned long),
204 i == entry->nb_args - 1 ? "" : ", ");
208 trace_seq_putc(s, '"');
210 for (i = 0; i < entry->nb_args; i++) {
211 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
217 return trace_seq_putc(s, '\n');
220 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
223 struct syscall_trace_exit trace;
225 ret = trace_seq_printf(s,
226 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
228 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
230 SYSCALL_FIELD(int, nr),
231 SYSCALL_FIELD(long, ret));
235 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
238 int syscall_enter_define_fields(struct ftrace_event_call *call)
240 struct syscall_trace_enter trace;
241 struct syscall_metadata *meta = call->data;
244 int offset = offsetof(typeof(trace), args);
246 ret = trace_define_common_fields(call);
250 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
254 for (i = 0; i < meta->nb_args; i++) {
255 ret = trace_define_field(call, meta->types[i],
256 meta->args[i], offset,
257 sizeof(unsigned long), 0,
259 offset += sizeof(unsigned long);
265 int syscall_exit_define_fields(struct ftrace_event_call *call)
267 struct syscall_trace_exit trace;
270 ret = trace_define_common_fields(call);
274 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
278 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
284 void ftrace_syscall_enter(struct pt_regs *regs, long id)
286 struct syscall_trace_enter *entry;
287 struct syscall_metadata *sys_data;
288 struct ring_buffer_event *event;
289 struct ring_buffer *buffer;
293 syscall_nr = syscall_get_nr(current, regs);
296 if (!test_bit(syscall_nr, enabled_enter_syscalls))
299 sys_data = syscall_nr_to_meta(syscall_nr);
303 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
305 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
310 entry = ring_buffer_event_data(event);
311 entry->nr = syscall_nr;
312 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
314 if (!filter_current_check_discard(buffer, sys_data->enter_event,
316 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
319 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
321 struct syscall_trace_exit *entry;
322 struct syscall_metadata *sys_data;
323 struct ring_buffer_event *event;
324 struct ring_buffer *buffer;
327 syscall_nr = syscall_get_nr(current, regs);
330 if (!test_bit(syscall_nr, enabled_exit_syscalls))
333 sys_data = syscall_nr_to_meta(syscall_nr);
337 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
338 sizeof(*entry), 0, 0);
342 entry = ring_buffer_event_data(event);
343 entry->nr = syscall_nr;
344 entry->ret = syscall_get_return_value(current, regs);
346 if (!filter_current_check_discard(buffer, sys_data->exit_event,
348 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
351 int reg_event_syscall_enter(struct ftrace_event_call *call)
357 name = ((struct syscall_metadata *)call->data)->name;
358 num = syscall_name_to_nr(name);
359 if (num < 0 || num >= NR_syscalls)
361 mutex_lock(&syscall_trace_lock);
362 if (!sys_refcount_enter)
363 ret = register_trace_sys_enter(ftrace_syscall_enter);
365 pr_info("event trace: Could not activate"
366 "syscall entry trace point");
368 set_bit(num, enabled_enter_syscalls);
369 sys_refcount_enter++;
371 mutex_unlock(&syscall_trace_lock);
375 void unreg_event_syscall_enter(struct ftrace_event_call *call)
380 name = ((struct syscall_metadata *)call->data)->name;
381 num = syscall_name_to_nr(name);
382 if (num < 0 || num >= NR_syscalls)
384 mutex_lock(&syscall_trace_lock);
385 sys_refcount_enter--;
386 clear_bit(num, enabled_enter_syscalls);
387 if (!sys_refcount_enter)
388 unregister_trace_sys_enter(ftrace_syscall_enter);
389 mutex_unlock(&syscall_trace_lock);
392 int reg_event_syscall_exit(struct ftrace_event_call *call)
398 name = ((struct syscall_metadata *)call->data)->name;
399 num = syscall_name_to_nr(name);
400 if (num < 0 || num >= NR_syscalls)
402 mutex_lock(&syscall_trace_lock);
403 if (!sys_refcount_exit)
404 ret = register_trace_sys_exit(ftrace_syscall_exit);
406 pr_info("event trace: Could not activate"
407 "syscall exit trace point");
409 set_bit(num, enabled_exit_syscalls);
412 mutex_unlock(&syscall_trace_lock);
416 void unreg_event_syscall_exit(struct ftrace_event_call *call)
421 name = ((struct syscall_metadata *)call->data)->name;
422 num = syscall_name_to_nr(name);
423 if (num < 0 || num >= NR_syscalls)
425 mutex_lock(&syscall_trace_lock);
427 clear_bit(num, enabled_exit_syscalls);
428 if (!sys_refcount_exit)
429 unregister_trace_sys_exit(ftrace_syscall_exit);
430 mutex_unlock(&syscall_trace_lock);
433 int __init init_ftrace_syscalls(void)
435 struct syscall_metadata *meta;
439 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
440 NR_syscalls, GFP_KERNEL);
441 if (!syscalls_metadata) {
446 for (i = 0; i < NR_syscalls; i++) {
447 addr = arch_syscall_addr(i);
448 meta = find_syscall_meta(addr);
449 syscalls_metadata[i] = meta;
454 core_initcall(init_ftrace_syscalls);
456 #ifdef CONFIG_EVENT_PROFILE
458 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
459 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
460 static int sys_prof_refcount_enter;
461 static int sys_prof_refcount_exit;
463 static void prof_syscall_enter(struct pt_regs *regs, long id)
465 struct syscall_metadata *sys_data;
466 struct syscall_trace_enter *rec;
475 syscall_nr = syscall_get_nr(current, regs);
476 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
479 sys_data = syscall_nr_to_meta(syscall_nr);
483 /* get the size after alignment with the u32 buffer size field */
484 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
485 size = ALIGN(size + sizeof(u32), sizeof(u64));
488 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
489 "profile buffer not large enough"))
492 /* Protect the per cpu buffer, begin the rcu read side */
493 local_irq_save(flags);
495 rctx = perf_swevent_get_recursion_context();
499 cpu = smp_processor_id();
501 trace_buf = rcu_dereference(perf_trace_buf);
506 raw_data = per_cpu_ptr(trace_buf, cpu);
508 /* zero the dead bytes from align to not leak stack to user */
509 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
511 rec = (struct syscall_trace_enter *) raw_data;
512 tracing_generic_entry_update(&rec->ent, 0, 0);
513 rec->ent.type = sys_data->enter_id;
514 rec->nr = syscall_nr;
515 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
516 (unsigned long *)&rec->args);
517 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
520 perf_swevent_put_recursion_context(rctx);
522 local_irq_restore(flags);
525 int reg_prof_syscall_enter(char *name)
530 num = syscall_name_to_nr(name);
531 if (num < 0 || num >= NR_syscalls)
534 mutex_lock(&syscall_trace_lock);
535 if (!sys_prof_refcount_enter)
536 ret = register_trace_sys_enter(prof_syscall_enter);
538 pr_info("event trace: Could not activate"
539 "syscall entry trace point");
541 set_bit(num, enabled_prof_enter_syscalls);
542 sys_prof_refcount_enter++;
544 mutex_unlock(&syscall_trace_lock);
548 void unreg_prof_syscall_enter(char *name)
552 num = syscall_name_to_nr(name);
553 if (num < 0 || num >= NR_syscalls)
556 mutex_lock(&syscall_trace_lock);
557 sys_prof_refcount_enter--;
558 clear_bit(num, enabled_prof_enter_syscalls);
559 if (!sys_prof_refcount_enter)
560 unregister_trace_sys_enter(prof_syscall_enter);
561 mutex_unlock(&syscall_trace_lock);
564 static void prof_syscall_exit(struct pt_regs *regs, long ret)
566 struct syscall_metadata *sys_data;
567 struct syscall_trace_exit *rec;
576 syscall_nr = syscall_get_nr(current, regs);
577 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
580 sys_data = syscall_nr_to_meta(syscall_nr);
584 /* We can probably do that at build time */
585 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
589 * Impossible, but be paranoid with the future
590 * How to put this check outside runtime?
592 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
593 "exit event has grown above profile buffer size"))
596 /* Protect the per cpu buffer, begin the rcu read side */
597 local_irq_save(flags);
599 rctx = perf_swevent_get_recursion_context();
603 cpu = smp_processor_id();
605 trace_buf = rcu_dereference(perf_trace_buf);
610 raw_data = per_cpu_ptr(trace_buf, cpu);
612 /* zero the dead bytes from align to not leak stack to user */
613 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
615 rec = (struct syscall_trace_exit *)raw_data;
617 tracing_generic_entry_update(&rec->ent, 0, 0);
618 rec->ent.type = sys_data->exit_id;
619 rec->nr = syscall_nr;
620 rec->ret = syscall_get_return_value(current, regs);
622 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
625 perf_swevent_put_recursion_context(rctx);
627 local_irq_restore(flags);
630 int reg_prof_syscall_exit(char *name)
635 num = syscall_name_to_nr(name);
636 if (num < 0 || num >= NR_syscalls)
639 mutex_lock(&syscall_trace_lock);
640 if (!sys_prof_refcount_exit)
641 ret = register_trace_sys_exit(prof_syscall_exit);
643 pr_info("event trace: Could not activate"
644 "syscall entry trace point");
646 set_bit(num, enabled_prof_exit_syscalls);
647 sys_prof_refcount_exit++;
649 mutex_unlock(&syscall_trace_lock);
653 void unreg_prof_syscall_exit(char *name)
657 num = syscall_name_to_nr(name);
658 if (num < 0 || num >= NR_syscalls)
661 mutex_lock(&syscall_trace_lock);
662 sys_prof_refcount_exit--;
663 clear_bit(num, enabled_prof_exit_syscalls);
664 if (!sys_prof_refcount_exit)
665 unregister_trace_sys_exit(prof_syscall_exit);
666 mutex_unlock(&syscall_trace_lock);