net: skb ftracer - Add config option to enable new ftracer (v3)
[safe/jmp/linux-2.6] / kernel / trace / ftrace.c
index 4d90c91..1e1d23c 100644 (file)
 #include <linux/list.h>
 #include <linux/hash.h>
 
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
+#include <asm/setup.h>
 
 #include "trace_output.h"
 #include "trace_stat.h"
@@ -290,7 +291,9 @@ function_stat_next(void *v, int idx)
        pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
 
  again:
-       rec++;
+       if (idx != 0)
+               rec++;
+
        if ((void *)rec >= (void *)&pg->records[pg->index]) {
                pg = pg->next;
                if (!pg)
@@ -347,8 +350,10 @@ static int function_stat_cmp(void *p1, void *p2)
 static int function_stat_headers(struct seq_file *m)
 {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       seq_printf(m, "  Function                               Hit    Time\n"
-                     "  --------                               ---    ----\n");
+       seq_printf(m, "  Function                               "
+                  "Hit    Time            Avg\n"
+                     "  --------                               "
+                  "---    ----            ---\n");
 #else
        seq_printf(m, "  Function                               Hit\n"
                      "  --------                               ---\n");
@@ -361,12 +366,9 @@ static int function_stat_show(struct seq_file *m, void *v)
        struct ftrace_profile *rec = v;
        char str[KSYM_SYMBOL_LEN];
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       static struct trace_seq s;
        static DEFINE_MUTEX(mutex);
-
-       mutex_lock(&mutex);
-       trace_seq_init(&s);
-       trace_print_graph_duration(rec->time, &s);
+       static struct trace_seq s;
+       unsigned long long avg;
 #endif
 
        kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -374,6 +376,14 @@ static int function_stat_show(struct seq_file *m, void *v)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        seq_printf(m, "    ");
+       avg = rec->time;
+       do_div(avg, rec->counter);
+
+       mutex_lock(&mutex);
+       trace_seq_init(&s);
+       trace_print_graph_duration(rec->time, &s);
+       trace_seq_puts(&s, "    ");
+       trace_print_graph_duration(avg, &s);
        trace_print_seq(m, &s);
        mutex_unlock(&mutex);
 #endif
@@ -591,7 +601,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
        local_irq_save(flags);
 
        stat = &__get_cpu_var(ftrace_profile_stats);
-       if (!stat->hash)
+       if (!stat->hash || !ftrace_profile_enabled)
                goto out;
 
        rec = ftrace_find_profiled_func(stat, ip);
@@ -622,7 +632,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 
        local_irq_save(flags);
        stat = &__get_cpu_var(ftrace_profile_stats);
-       if (!stat->hash)
+       if (!stat->hash || !ftrace_profile_enabled)
                goto out;
 
        calltime = trace->rettime - trace->calltime;
@@ -716,6 +726,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
                        ftrace_profile_enabled = 1;
                } else {
                        ftrace_profile_enabled = 0;
+                       /*
+                        * unregister_ftrace_profiler calls stop_machine
+                        * so this acts like an synchronize_sched.
+                        */
                        unregister_ftrace_profiler();
                }
        }
@@ -754,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
        .stat_show      = function_stat_show
 };
 
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
        struct ftrace_profile_stat *stat;
        struct dentry *entry;
@@ -772,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
                         * The files created are permanent, if something happens
                         * we still do not free memory.
                         */
-                       kfree(stat);
                        WARN(1,
                             "Could not allocate stat file for cpu %d\n",
                             cpu);
@@ -799,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
 }
 
 #else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
 }
 #endif /* CONFIG_FUNCTION_PROFILER */
@@ -909,25 +922,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
        rec->flags |= FTRACE_FL_FREE;
 }
 
-void ftrace_release(void *start, unsigned long size)
-{
-       struct dyn_ftrace *rec;
-       struct ftrace_page *pg;
-       unsigned long s = (unsigned long)start;
-       unsigned long e = s + size;
-
-       if (ftrace_disabled || !start)
-               return;
-
-       mutex_lock(&ftrace_lock);
-       do_for_each_ftrace_rec(pg, rec) {
-               if ((rec->ip >= s) && (rec->ip < e) &&
-                   !(rec->flags & FTRACE_FL_FREE))
-                       ftrace_free_rec(rec);
-       } while_for_each_ftrace_rec();
-       mutex_unlock(&ftrace_lock);
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
        struct dyn_ftrace *rec;
@@ -1231,6 +1225,13 @@ static void ftrace_shutdown(int command)
                return;
 
        ftrace_start_up--;
+       /*
+        * Just warn in case of unbalance, no need to kill ftrace, it's not
+        * critical but the ftrace_call callers may be never nopped again after
+        * further ftrace uses.
+        */
+       WARN_ON_ONCE(ftrace_start_up < 0);
+
        if (!ftrace_start_up)
                command |= FTRACE_DISABLE_CALLS;
 
@@ -1417,10 +1418,20 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
        void *p = NULL;
+       loff_t l;
+
+       if (!(iter->flags & FTRACE_ITER_HASH))
+               *pos = 0;
 
        iter->flags |= FTRACE_ITER_HASH;
 
-       return t_hash_next(m, p, pos);
+       iter->hidx = 0;
+       for (l = 0; l <= *pos; ) {
+               p = t_hash_next(m, p, &l);
+               if (!p)
+                       break;
+       }
+       return p;
 }
 
 static int t_hash_show(struct seq_file *m, void *v)
@@ -1467,8 +1478,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                        iter->pg = iter->pg->next;
                        iter->idx = 0;
                        goto retry;
-               } else {
-                       iter->idx = -1;
                }
        } else {
                rec = &iter->pg->records[iter->idx++];
@@ -1497,6 +1506,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
        void *p = NULL;
+       loff_t l;
 
        mutex_lock(&ftrace_lock);
        /*
@@ -1508,23 +1518,21 @@ static void *t_start(struct seq_file *m, loff_t *pos)
                if (*pos > 0)
                        return t_hash_start(m, pos);
                iter->flags |= FTRACE_ITER_PRINTALL;
-               (*pos)++;
                return iter;
        }
 
        if (iter->flags & FTRACE_ITER_HASH)
                return t_hash_start(m, pos);
 
-       if (*pos > 0) {
-               if (iter->idx < 0)
-                       return p;
-               (*pos)--;
-               iter->idx--;
+       iter->pg = ftrace_pages_start;
+       iter->idx = 0;
+       for (l = 0; l <= *pos; ) {
+               p = t_next(m, p, &l);
+               if (!p)
+                       break;
        }
 
-       p = t_next(m, p, pos);
-
-       if (!p)
+       if (!p && iter->flags & FTRACE_ITER_FILTER)
                return t_hash_start(m, pos);
 
        return p;
@@ -1654,7 +1662,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 
        mutex_lock(&ftrace_regex_lock);
        if ((file->f_mode & FMODE_WRITE) &&
-           !(file->f_flags & O_APPEND))
+           (file->f_flags & O_TRUNC))
                ftrace_filter_reset(enable);
 
        if (file->f_mode & FMODE_READ) {
@@ -2381,6 +2389,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
        ftrace_set_regex(buf, len, reset, 0);
 }
 
+/*
+ * command line interface to allow users to set filters on boot up.
+ */
+#define FTRACE_FILTER_SIZE             COMMAND_LINE_SIZE
+static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
+static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+
+static int __init set_ftrace_notrace(char *str)
+{
+       strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
+       return 1;
+}
+__setup("ftrace_notrace=", set_ftrace_notrace);
+
+static int __init set_ftrace_filter(char *str)
+{
+       strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
+       return 1;
+}
+__setup("ftrace_filter=", set_ftrace_filter);
+
+static void __init set_ftrace_early_filter(char *buf, int enable)
+{
+       char *func;
+
+       while (buf) {
+               func = strsep(&buf, ",");
+               ftrace_set_regex(func, strlen(func), 0, enable);
+       }
+}
+
+static void __init set_ftrace_early_filters(void)
+{
+       if (ftrace_filter_buf[0])
+               set_ftrace_early_filter(ftrace_filter_buf, 1);
+       if (ftrace_notrace_buf[0])
+               set_ftrace_early_filter(ftrace_notrace_buf, 0);
+}
+
 static int
 ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 {
@@ -2461,32 +2508,31 @@ int ftrace_graph_count;
 unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
 
 static void *
-g_next(struct seq_file *m, void *v, loff_t *pos)
+__g_next(struct seq_file *m, loff_t *pos)
 {
        unsigned long *array = m->private;
-       int index = *pos;
-
-       (*pos)++;
 
-       if (index >= ftrace_graph_count)
+       if (*pos >= ftrace_graph_count)
                return NULL;
+       return &array[*pos];
+}
 
-       return &array[index];
+static void *
+g_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       (*pos)++;
+       return __g_next(m, pos);
 }
 
 static void *g_start(struct seq_file *m, loff_t *pos)
 {
-       void *p = NULL;
-
        mutex_lock(&graph_lock);
 
        /* Nothing, tell g_show to print all functions are enabled */
        if (!ftrace_graph_count && !*pos)
                return (void *)1;
 
-       p = g_next(m, p, pos);
-
-       return p;
+       return __g_next(m, pos);
 }
 
 static void g_stop(struct seq_file *m, void *p)
@@ -2531,7 +2577,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 
        mutex_lock(&graph_lock);
        if ((file->f_mode & FMODE_WRITE) &&
-           !(file->f_flags & O_APPEND)) {
+           (file->f_flags & O_TRUNC)) {
                ftrace_graph_count = 0;
                memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
        }
@@ -2550,6 +2596,14 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 }
 
 static int
+ftrace_graph_release(struct inode *inode, struct file *file)
+{
+       if (file->f_mode & FMODE_READ)
+               seq_release(inode, file);
+       return 0;
+}
+
+static int
 ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 {
        struct dyn_ftrace *rec;
@@ -2678,46 +2732,32 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 }
 
 static const struct file_operations ftrace_graph_fops = {
-       .open = ftrace_graph_open,
-       .read = seq_read,
-       .write = ftrace_graph_write,
+       .open           = ftrace_graph_open,
+       .read           = seq_read,
+       .write          = ftrace_graph_write,
+       .release        = ftrace_graph_release,
 };
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 {
-       struct dentry *entry;
 
-       entry = debugfs_create_file("available_filter_functions", 0444,
-                                   d_tracer, NULL, &ftrace_avail_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'available_filter_functions' entry\n");
+       trace_create_file("available_filter_functions", 0444,
+                       d_tracer, NULL, &ftrace_avail_fops);
 
-       entry = debugfs_create_file("failures", 0444,
-                                   d_tracer, NULL, &ftrace_failures_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'failures' entry\n");
+       trace_create_file("failures", 0444,
+                       d_tracer, NULL, &ftrace_failures_fops);
 
-       entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
-                                   NULL, &ftrace_filter_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_ftrace_filter' entry\n");
+       trace_create_file("set_ftrace_filter", 0644, d_tracer,
+                       NULL, &ftrace_filter_fops);
 
-       entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+       trace_create_file("set_ftrace_notrace", 0644, d_tracer,
                                    NULL, &ftrace_notrace_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_ftrace_notrace' entry\n");
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+       trace_create_file("set_graph_function", 0444, d_tracer,
                                    NULL,
                                    &ftrace_graph_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
        return 0;
@@ -2755,14 +2795,72 @@ static int ftrace_convert_nops(struct module *mod,
        return 0;
 }
 
-void ftrace_init_module(struct module *mod,
-                       unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+       struct dyn_ftrace *rec;
+       struct ftrace_page *pg;
+       unsigned long s = (unsigned long)start;
+       unsigned long e = (unsigned long)end;
+
+       if (ftrace_disabled || !start || start == end)
+               return;
+
+       mutex_lock(&ftrace_lock);
+       do_for_each_ftrace_rec(pg, rec) {
+               if ((rec->ip >= s) && (rec->ip < e)) {
+                       /*
+                        * rec->ip is changed in ftrace_free_rec()
+                        * It should not between s and e if record was freed.
+                        */
+                       FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+                       ftrace_free_rec(rec);
+               }
+       } while_for_each_ftrace_rec();
+       mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+                              unsigned long *start, unsigned long *end)
 {
        if (ftrace_disabled || start == end)
                return;
        ftrace_convert_nops(mod, start, end);
 }
 
+static int ftrace_module_notify(struct notifier_block *self,
+                               unsigned long val, void *data)
+{
+       struct module *mod = data;
+
+       switch (val) {
+       case MODULE_STATE_COMING:
+               ftrace_init_module(mod, mod->ftrace_callsites,
+                                  mod->ftrace_callsites +
+                                  mod->num_ftrace_callsites);
+               break;
+       case MODULE_STATE_GOING:
+               ftrace_release(mod->ftrace_callsites,
+                              mod->ftrace_callsites +
+                              mod->num_ftrace_callsites);
+               break;
+       }
+
+       return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+                               unsigned long val, void *data)
+{
+       return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+       .notifier_call = ftrace_module_notify,
+       .priority = 0,
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -2794,6 +2892,12 @@ void __init ftrace_init(void)
                                  __start_mcount_loc,
                                  __stop_mcount_loc);
 
+       ret = register_module_notifier(&ftrace_module_nb);
+       if (ret)
+               pr_warning("Failed to register trace ftrace module notifier\n");
+
+       set_ftrace_early_filters();
+
        return;
  failed:
        ftrace_disabled = 1;
@@ -2975,7 +3079,6 @@ static const struct file_operations ftrace_pid_fops = {
 static __init int ftrace_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
 
        d_tracer = tracing_init_dentry();
        if (!d_tracer)
@@ -2983,11 +3086,8 @@ static __init int ftrace_init_debugfs(void)
 
        ftrace_init_dyn_debugfs(d_tracer);
 
-       entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
-                                   NULL, &ftrace_pid_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_ftrace_pid' entry\n");
+       trace_create_file("set_ftrace_pid", 0644, d_tracer,
+                           NULL, &ftrace_pid_fops);
 
        ftrace_profile_debugfs(d_tracer);
 
@@ -3068,10 +3168,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
        ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
 
-       if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+       if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
                goto out;
 
-       last_ftrace_enabled = ftrace_enabled;
+       last_ftrace_enabled = !!ftrace_enabled;
 
        if (ftrace_enabled) {
 
@@ -3099,7 +3199,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static atomic_t ftrace_graph_active;
+static int ftrace_graph_active;
 static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -3141,12 +3241,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
                }
 
                if (t->ret_stack == NULL) {
-                       t->curr_ret_stack = -1;
-                       /* Make sure IRQs see the -1 first: */
-                       barrier();
-                       t->ret_stack = ret_stack_list[start++];
                        atomic_set(&t->tracing_graph_pause, 0);
                        atomic_set(&t->trace_overrun, 0);
+                       t->curr_ret_stack = -1;
+                       /* Make sure the tasks see the -1 first: */
+                       smp_wmb();
+                       t->ret_stack = ret_stack_list[start++];
                }
        } while_each_thread(g, t);
 
@@ -3204,8 +3304,10 @@ static int start_graph_tracing(void)
                return -ENOMEM;
 
        /* The cpu_boot init_task->ret_stack will never be freed */
-       for_each_online_cpu(cpu)
-               ftrace_graph_init_task(idle_task(cpu));
+       for_each_online_cpu(cpu) {
+               if (!idle_task(cpu)->ret_stack)
+                       ftrace_graph_init_task(idle_task(cpu));
+       }
 
        do {
                ret = alloc_retstack_tasklist(ret_stack_list);
@@ -3251,7 +3353,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
        mutex_lock(&ftrace_lock);
 
        /* we currently allow only one tracer registered at a time */
-       if (atomic_read(&ftrace_graph_active)) {
+       if (ftrace_graph_active) {
                ret = -EBUSY;
                goto out;
        }
@@ -3259,10 +3361,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
        ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
        register_pm_notifier(&ftrace_suspend_notifier);
 
-       atomic_inc(&ftrace_graph_active);
+       ftrace_graph_active++;
        ret = start_graph_tracing();
        if (ret) {
-               atomic_dec(&ftrace_graph_active);
+               ftrace_graph_active--;
                goto out;
        }
 
@@ -3280,31 +3382,42 @@ void unregister_ftrace_graph(void)
 {
        mutex_lock(&ftrace_lock);
 
-       atomic_dec(&ftrace_graph_active);
+       if (unlikely(!ftrace_graph_active))
+               goto out;
+
+       ftrace_graph_active--;
        unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
        ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
        ftrace_graph_entry = ftrace_graph_entry_stub;
        ftrace_shutdown(FTRACE_STOP_FUNC_RET);
        unregister_pm_notifier(&ftrace_suspend_notifier);
 
+ out:
        mutex_unlock(&ftrace_lock);
 }
 
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
-       if (atomic_read(&ftrace_graph_active)) {
-               t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+       /* Make sure we do not use the parent ret_stack */
+       t->ret_stack = NULL;
+
+       if (ftrace_graph_active) {
+               struct ftrace_ret_stack *ret_stack;
+
+               ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
                                * sizeof(struct ftrace_ret_stack),
                                GFP_KERNEL);
-               if (!t->ret_stack)
+               if (!ret_stack)
                        return;
                t->curr_ret_stack = -1;
                atomic_set(&t->tracing_graph_pause, 0);
                atomic_set(&t->trace_overrun, 0);
                t->ftrace_timestamp = 0;
-       } else
-               t->ret_stack = NULL;
+               /* make curr_ret_stack visable before we add the ret_stack */
+               smp_wmb();
+               t->ret_stack = ret_stack;
+       }
 }
 
 void ftrace_graph_exit_task(struct task_struct *t)