rcu: Eliminate unneeded function wrapping
[safe/jmp/linux-2.6] / kernel / trace / trace_syscalls.c
index 85291c4..527e17e 100644 (file)
@@ -2,7 +2,7 @@
 #include <trace/events/syscalls.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
@@ -11,8 +11,8 @@
 static DEFINE_MUTEX(syscall_trace_lock);
 static int sys_refcount_enter;
 static int sys_refcount_exit;
-static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
-static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
+static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
 
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
@@ -166,7 +166,7 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
                               SYSCALL_FIELD(int, nr),
-                              SYSCALL_FIELD(unsigned long, ret));
+                              SYSCALL_FIELD(long, ret));
        if (!ret)
                return 0;
 
@@ -212,7 +212,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
        if (ret)
                return ret;
 
-       ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
+       ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
                                 FILTER_OTHER);
 
        return ret;
@@ -223,10 +223,13 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
        struct syscall_trace_enter *entry;
        struct syscall_metadata *sys_data;
        struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
        int size;
        int syscall_nr;
 
        syscall_nr = syscall_get_nr(current, regs);
+       if (syscall_nr < 0)
+               return;
        if (!test_bit(syscall_nr, enabled_enter_syscalls))
                return;
 
@@ -236,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 
        size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-       event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
-                                                       0, 0);
+       event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
+                                                 size, 0, 0);
        if (!event)
                return;
 
@@ -245,8 +248,9 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
        entry->nr = syscall_nr;
        syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
 
-       if (!filter_current_check_discard(sys_data->enter_event, entry, event))
-               trace_current_buffer_unlock_commit(event, 0, 0);
+       if (!filter_current_check_discard(buffer, sys_data->enter_event,
+                                         entry, event))
+               trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
@@ -254,9 +258,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
        struct syscall_trace_exit *entry;
        struct syscall_metadata *sys_data;
        struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
        int syscall_nr;
 
        syscall_nr = syscall_get_nr(current, regs);
+       if (syscall_nr < 0)
+               return;
        if (!test_bit(syscall_nr, enabled_exit_syscalls))
                return;
 
@@ -264,7 +271,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
        if (!sys_data)
                return;
 
-       event = trace_current_buffer_lock_reserve(sys_data->exit_id,
+       event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
                                sizeof(*entry), 0, 0);
        if (!event)
                return;
@@ -273,8 +280,9 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
        entry->nr = syscall_nr;
        entry->ret = syscall_get_return_value(current, regs);
 
-       if (!filter_current_check_discard(sys_data->exit_event, entry, event))
-               trace_current_buffer_unlock_commit(event, 0, 0);
+       if (!filter_current_check_discard(buffer, sys_data->exit_event,
+                                         entry, event))
+               trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
 int reg_event_syscall_enter(void *ptr)
@@ -285,7 +293,7 @@ int reg_event_syscall_enter(void *ptr)
 
        name = (char *)ptr;
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
        mutex_lock(&syscall_trace_lock);
        if (!sys_refcount_enter)
@@ -308,7 +316,7 @@ void unreg_event_syscall_enter(void *ptr)
 
        name = (char *)ptr;
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return;
        mutex_lock(&syscall_trace_lock);
        sys_refcount_enter--;
@@ -326,7 +334,7 @@ int reg_event_syscall_exit(void *ptr)
 
        name = (char *)ptr;
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
        mutex_lock(&syscall_trace_lock);
        if (!sys_refcount_exit)
@@ -349,7 +357,7 @@ void unreg_event_syscall_exit(void *ptr)
 
        name = (char *)ptr;
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return;
        mutex_lock(&syscall_trace_lock);
        sys_refcount_exit--;
@@ -369,17 +377,20 @@ struct trace_event event_syscall_exit = {
 
 #ifdef CONFIG_EVENT_PROFILE
 
-static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX);
-static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX);
+static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
 static int sys_prof_refcount_enter;
 static int sys_prof_refcount_exit;
 
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
-       struct syscall_trace_enter *rec;
        struct syscall_metadata *sys_data;
+       struct syscall_trace_enter *rec;
+       unsigned long flags;
+       char *raw_data;
        int syscall_nr;
        int size;
+       int cpu;
 
        syscall_nr = syscall_get_nr(current, regs);
        if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -394,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
        size = ALIGN(size + sizeof(u32), sizeof(u64));
        size -= sizeof(u32);
 
-       do {
-               char raw_data[size];
+       if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+                     "profile buffer not large enough"))
+               return;
+
+       /* Protect the per cpu buffer, begin the rcu read side */
+       local_irq_save(flags);
+
+       cpu = smp_processor_id();
 
-               /* zero the dead bytes from align to not leak stack to user */
-               *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+       if (in_nmi())
+               raw_data = rcu_dereference(trace_profile_buf_nmi);
+       else
+               raw_data = rcu_dereference(trace_profile_buf);
 
-               rec = (struct syscall_trace_enter *) raw_data;
-               tracing_generic_entry_update(&rec->ent, 0, 0);
-               rec->ent.type = sys_data->enter_id;
-               rec->nr = syscall_nr;
-               syscall_get_arguments(current, regs, 0, sys_data->nb_args,
-                                      (unsigned long *)&rec->args);
-               perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
-       } while(0);
+       if (!raw_data)
+               goto end;
+
+       raw_data = per_cpu_ptr(raw_data, cpu);
+
+       /* zero the dead bytes from align to not leak stack to user */
+       *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+       rec = (struct syscall_trace_enter *) raw_data;
+       tracing_generic_entry_update(&rec->ent, 0, 0);
+       rec->ent.type = sys_data->enter_id;
+       rec->nr = syscall_nr;
+       syscall_get_arguments(current, regs, 0, sys_data->nb_args,
+                              (unsigned long *)&rec->args);
+       perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+
+end:
+       local_irq_restore(flags);
 }
 
 int reg_prof_syscall_enter(char *name)
@@ -416,7 +445,7 @@ int reg_prof_syscall_enter(char *name)
        int num;
 
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
 
        mutex_lock(&syscall_trace_lock);
@@ -438,7 +467,7 @@ void unreg_prof_syscall_enter(char *name)
        int num;
 
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return;
 
        mutex_lock(&syscall_trace_lock);
@@ -452,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
 static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
        struct syscall_metadata *sys_data;
-       struct syscall_trace_exit rec;
+       struct syscall_trace_exit *rec;
+       unsigned long flags;
        int syscall_nr;
+       char *raw_data;
+       int size;
+       int cpu;
 
        syscall_nr = syscall_get_nr(current, regs);
        if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -463,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
        if (!sys_data)
                return;
 
-       tracing_generic_entry_update(&rec.ent, 0, 0);
-       rec.ent.type = sys_data->exit_id;
-       rec.nr = syscall_nr;
-       rec.ret = syscall_get_return_value(current, regs);
+       /* We can probably do that at build time */
+       size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
+
+       /*
+        * Impossible, but be paranoid with the future
+        * How to put this check outside runtime?
+        */
+       if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+               "exit event has grown above profile buffer size"))
+               return;
+
+       /* Protect the per cpu buffer, begin the rcu read side */
+       local_irq_save(flags);
+       cpu = smp_processor_id();
+
+       if (in_nmi())
+               raw_data = rcu_dereference(trace_profile_buf_nmi);
+       else
+               raw_data = rcu_dereference(trace_profile_buf);
+
+       if (!raw_data)
+               goto end;
 
-       perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+       raw_data = per_cpu_ptr(raw_data, cpu);
+
+       /* zero the dead bytes from align to not leak stack to user */
+       *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+       rec = (struct syscall_trace_exit *)raw_data;
+
+       tracing_generic_entry_update(&rec->ent, 0, 0);
+       rec->ent.type = sys_data->exit_id;
+       rec->nr = syscall_nr;
+       rec->ret = syscall_get_return_value(current, regs);
+
+       perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+
+end:
+       local_irq_restore(flags);
 }
 
 int reg_prof_syscall_exit(char *name)
@@ -477,7 +544,7 @@ int reg_prof_syscall_exit(char *name)
        int num;
 
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
 
        mutex_lock(&syscall_trace_lock);
@@ -499,7 +566,7 @@ void unreg_prof_syscall_exit(char *name)
        int num;
 
        num = syscall_name_to_nr(name);
-       if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+       if (num < 0 || num >= NR_syscalls)
                return;
 
        mutex_lock(&syscall_trace_lock);