tracing: only show tracing_max_latency when latency tracer configured
[safe/jmp/linux-2.6] / kernel / trace / trace.c
index 7b6043e..0f08816 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
@@ -30,6 +31,7 @@
 #include <linux/percpu.h>
 #include <linux/splice.h>
 #include <linux/kdebug.h>
+#include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
 
 #define TRACE_BUFFER_FLAGS     (RB_FL_OVERWRITE)
 
-unsigned long __read_mostly    tracing_max_latency;
-unsigned long __read_mostly    tracing_thresh;
-
 /*
  * On boot up, the ring buffer is set to the minimum size, so that
  * we do not waste memory on systems that are not using tracing.
  */
-static int ring_buffer_expanded;
+int ring_buffer_expanded;
 
 /*
  * We need to change this state when a selftest is running.
@@ -62,7 +61,7 @@ static bool __read_mostly tracing_selftest_running;
 /*
  * If a tracer is running, we do not want to run SELFTEST.
  */
-static bool __read_mostly tracing_selftest_disabled;
+bool __read_mostly tracing_selftest_disabled;
 
 /* For tracers that don't implement custom flags */
 static struct tracer_opt dummy_tracer_opt[] = {
@@ -87,7 +86,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
  */
 static int tracing_disabled = 1;
 
-static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
 
 static inline void ftrace_disable_cpu(void)
 {
@@ -147,21 +146,13 @@ static int __init set_ftrace_dump_on_oops(char *str)
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
 {
        nsec += 500;
        do_div(nsec, 1000);
        return nsec;
 }
 
-cycle_t ftrace_now(int cpu)
-{
-       u64 ts = ring_buffer_time_stamp(cpu);
-       ring_buffer_normalize_time_stamp(cpu, &ts);
-       return ts;
-}
-
 /*
  * The global_trace is the descriptor that holds the tracing
  * buffers for the live tracing. For each CPU, it contains
@@ -178,6 +169,27 @@ static struct trace_array  global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
+int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+                                struct ring_buffer_event *event)
+{
+       return filter_check_discard(call, rec, global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(filter_current_check_discard);
+
+cycle_t ftrace_now(int cpu)
+{
+       u64 ts;
+
+       /* Early boot up does not have a buffer yet */
+       if (!global_trace.buffer)
+               return trace_clock_local();
+
+       ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
+       ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
+
+       return ts;
+}
+
 /*
  * The max_tr is used to snapshot the global_trace when a maximum
  * latency is reached. Some tracers will use this to store a maximum
@@ -248,7 +260,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-       TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
+       TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
+       TRACE_ITER_GRAPH_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -269,13 +282,12 @@ void trace_wake_up(void)
 static int __init set_buf_size(char *str)
 {
        unsigned long buf_size;
-       int ret;
 
        if (!str)
                return 0;
-       ret = strict_strtoul(str, 0, &buf_size);
+       buf_size = memparse(str, &str);
        /* nr_entries can not be zero */
-       if (ret < 0 || buf_size == 0)
+       if (buf_size == 0)
                return 0;
        trace_buf_size = buf_size;
        return 1;
@@ -308,47 +320,20 @@ static const char *trace_options[] = {
        "printk-msg-only",
        "context-info",
        "latency-format",
+       "sleep-time",
+       "graph-time",
        NULL
 };
 
-/*
- * ftrace_max_lock is used to protect the swapping of buffers
- * when taking a max snapshot. The buffers themselves are
- * protected by per_cpu spinlocks. But the action of the swap
- * needs its own lock.
- *
- * This is defined as a raw_spinlock_t in order to help
- * with performance when lockdep debugging is enabled.
- */
-static raw_spinlock_t ftrace_max_lock =
-       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-
-/*
- * Copy the new maximum trace into the separate maximum-trace
- * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /debugfs/tracing/latency_trace)
- */
-static void
-__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
-{
-       struct trace_array_cpu *data = tr->data[cpu];
-
-       max_tr.cpu = cpu;
-       max_tr.time_start = data->preempt_timestamp;
-
-       data = max_tr.data[cpu];
-       data->saved_latency = tracing_max_latency;
-
-       memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
-       data->pid = tsk->pid;
-       data->uid = task_uid(tsk);
-       data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
-       data->policy = tsk->policy;
-       data->rt_priority = tsk->rt_priority;
+static struct {
+       u64 (*func)(void);
+       const char *name;
+} trace_clocks[] = {
+       { trace_clock_local,    "local" },
+       { trace_clock_global,   "global" },
+};
 
-       /* record this tasks comm */
-       tracing_record_cmdline(tsk);
-}
+int trace_clock_id;
 
 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
 {
@@ -374,7 +359,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
        return cnt;
 }
 
-ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
+static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 {
        int len;
        void *ret;
@@ -393,15 +378,51 @@ ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
        return cnt;
 }
 
+/*
+ * ftrace_max_lock is used to protect the swapping of buffers
+ * when taking a max snapshot. The buffers themselves are
+ * protected by per_cpu spinlocks. But the action of the swap
+ * needs its own lock.
+ *
+ * This is defined as a raw_spinlock_t in order to help
+ * with performance when lockdep debugging is enabled.
+ *
+ * It is also used in other places outside the update_max_tr
+ * so it needs to be defined outside of the
+ * CONFIG_TRACER_MAX_TRACE.
+ */
+static raw_spinlock_t ftrace_max_lock =
+       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+unsigned long __read_mostly    tracing_max_latency;
+unsigned long __read_mostly    tracing_thresh;
+
+/*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+ * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
+ */
 static void
-trace_print_seq(struct seq_file *m, struct trace_seq *s)
+__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-       int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+       struct trace_array_cpu *data = tr->data[cpu];
+
+       max_tr.cpu = cpu;
+       max_tr.time_start = data->preempt_timestamp;
 
-       s->buffer[len] = 0;
-       seq_puts(m, s->buffer);
+       data = max_tr.data[cpu];
+       data->saved_latency = tracing_max_latency;
 
-       trace_seq_init(s);
+       memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+       data->pid = tsk->pid;
+       data->uid = task_uid(tsk);
+       data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
+       data->policy = tsk->policy;
+       data->rt_priority = tsk->rt_priority;
+
+       /* record this tasks comm */
+       tracing_record_cmdline(tsk);
 }
 
 /**
@@ -460,6 +481,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
        __update_max_tr(tr, tsk, cpu);
        __raw_spin_unlock(&ftrace_max_lock);
 }
+#endif /* CONFIG_TRACER_MAX_TRACE */
 
 /**
  * register_tracer - register a tracer with the ftrace system.
@@ -632,7 +654,18 @@ void tracing_reset_online_cpus(struct trace_array *tr)
                tracing_reset(tr, cpu);
 }
 
+void tracing_reset_current(int cpu)
+{
+       tracing_reset(&global_trace, cpu);
+}
+
+void tracing_reset_current_online_cpus(void)
+{
+       tracing_reset_online_cpus(&global_trace);
+}
+
 #define SAVED_CMDLINES 128
+#define NO_CMDLINE_MAP UINT_MAX
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
@@ -644,8 +677,8 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
 
 static void trace_init_cmdlines(void)
 {
-       memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
-       memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
+       memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
+       memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
        cmdline_idx = 0;
 }
 
@@ -737,8 +770,7 @@ void trace_stop_cmdline_recording(void);
 
 static void trace_save_cmdline(struct task_struct *tsk)
 {
-       unsigned map;
-       unsigned idx;
+       unsigned pid, idx;
 
        if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
                return;
@@ -753,13 +785,20 @@ static void trace_save_cmdline(struct task_struct *tsk)
                return;
 
        idx = map_pid_to_cmdline[tsk->pid];
-       if (idx >= SAVED_CMDLINES) {
+       if (idx == NO_CMDLINE_MAP) {
                idx = (cmdline_idx + 1) % SAVED_CMDLINES;
 
-               map = map_cmdline_to_pid[idx];
-               if (map <= PID_MAX_DEFAULT)
-                       map_pid_to_cmdline[map] = (unsigned)-1;
+               /*
+                * Check whether the cmdline buffer at idx has a pid
+                * mapped. We are going to overwrite that entry so we
+                * need to clear the map_pid_to_cmdline. Otherwise we
+                * would read the new comm for the old pid.
+                */
+               pid = map_cmdline_to_pid[idx];
+               if (pid != NO_CMDLINE_MAP)
+                       map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
 
+               map_cmdline_to_pid[idx] = tsk->pid;
                map_pid_to_cmdline[tsk->pid] = idx;
 
                cmdline_idx = idx;
@@ -784,15 +823,16 @@ void trace_find_cmdline(int pid, char comm[])
                return;
        }
 
+       preempt_disable();
        __raw_spin_lock(&trace_cmdline_lock);
        map = map_pid_to_cmdline[pid];
-       if (map >= SAVED_CMDLINES)
-               goto out;
-
-       strcpy(comm, saved_cmdlines[map]);
+       if (map != NO_CMDLINE_MAP)
+               strcpy(comm, saved_cmdlines[map]);
+       else
+               strcpy(comm, "<...>");
 
- out:
        __raw_spin_unlock(&trace_cmdline_lock);
+       preempt_enable();
 }
 
 void tracing_record_cmdline(struct task_struct *tsk)
@@ -823,9 +863,10 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
                ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
                (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
 }
+EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-                                                   unsigned char type,
+                                                   int type,
                                                    unsigned long len,
                                                    unsigned long flags, int pc)
 {
@@ -841,41 +882,63 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
 
        return event;
 }
-static void ftrace_trace_stack(struct trace_array *tr,
-                              unsigned long flags, int skip, int pc);
-static void ftrace_trace_userstack(struct trace_array *tr,
-                                  unsigned long flags, int pc);
 
-void trace_buffer_unlock_commit(struct trace_array *tr,
-                               struct ring_buffer_event *event,
-                               unsigned long flags, int pc)
+static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
+                                       struct ring_buffer_event *event,
+                                       unsigned long flags, int pc,
+                                       int wake)
 {
        ring_buffer_unlock_commit(tr->buffer, event);
 
        ftrace_trace_stack(tr, flags, 6, pc);
        ftrace_trace_userstack(tr, flags, pc);
-       trace_wake_up();
+
+       if (wake)
+               trace_wake_up();
+}
+
+void trace_buffer_unlock_commit(struct trace_array *tr,
+                                       struct ring_buffer_event *event,
+                                       unsigned long flags, int pc)
+{
+       __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
                                  unsigned long flags, int pc)
 {
        return trace_buffer_lock_reserve(&global_trace,
                                         type, len, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
                                        unsigned long flags, int pc)
 {
-       return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
+       __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+}
+EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
+
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+                                       unsigned long flags, int pc)
+{
+       __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
 }
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
+
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+{
+       ring_buffer_discard_commit(global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
 void
 trace_function(struct trace_array *tr,
               unsigned long ip, unsigned long parent_ip, unsigned long flags,
               int pc)
 {
+       struct ftrace_event_call *call = &event_function;
        struct ring_buffer_event *event;
        struct ftrace_entry *entry;
 
@@ -890,50 +953,10 @@ trace_function(struct trace_array *tr,
        entry   = ring_buffer_event_data(event);
        entry->ip                       = ip;
        entry->parent_ip                = parent_ip;
-       ring_buffer_unlock_commit(tr->buffer, event);
-}
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void __trace_graph_entry(struct trace_array *tr,
-                               struct ftrace_graph_ent *trace,
-                               unsigned long flags,
-                               int pc)
-{
-       struct ring_buffer_event *event;
-       struct ftrace_graph_ent_entry *entry;
-
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
-               return;
-
-       event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->graph_ent                        = *trace;
-       ring_buffer_unlock_commit(global_trace.buffer, event);
-}
-
-static void __trace_graph_return(struct trace_array *tr,
-                               struct ftrace_graph_ret *trace,
-                               unsigned long flags,
-                               int pc)
-{
-       struct ring_buffer_event *event;
-       struct ftrace_graph_ret_entry *entry;
 
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
-               return;
-
-       event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->ret                              = *trace;
-       ring_buffer_unlock_commit(global_trace.buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 }
-#endif
 
 void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
@@ -944,11 +967,12 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
                trace_function(tr, ip, parent_ip, flags, pc);
 }
 
+#ifdef CONFIG_STACKTRACE
 static void __ftrace_trace_stack(struct trace_array *tr,
                                 unsigned long flags,
                                 int skip, int pc)
 {
-#ifdef CONFIG_STACKTRACE
+       struct ftrace_event_call *call = &event_kernel_stack;
        struct ring_buffer_event *event;
        struct stack_entry *entry;
        struct stack_trace trace;
@@ -966,13 +990,12 @@ static void __ftrace_trace_stack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace(&trace);
-       ring_buffer_unlock_commit(tr->buffer, event);
-#endif
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 }
 
-static void ftrace_trace_stack(struct trace_array *tr,
-                              unsigned long flags,
-                              int skip, int pc)
+void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+                       int pc)
 {
        if (!(trace_flags & TRACE_ITER_STACKTRACE))
                return;
@@ -980,17 +1003,15 @@ static void ftrace_trace_stack(struct trace_array *tr,
        __ftrace_trace_stack(tr, flags, skip, pc);
 }
 
-void __trace_stack(struct trace_array *tr,
-                  unsigned long flags,
-                  int skip, int pc)
+void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+                  int pc)
 {
        __ftrace_trace_stack(tr, flags, skip, pc);
 }
 
-static void ftrace_trace_userstack(struct trace_array *tr,
-                                  unsigned long flags, int pc)
+void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
 {
-#ifdef CONFIG_STACKTRACE
+       struct ftrace_event_call *call = &event_user_stack;
        struct ring_buffer_event *event;
        struct userstack_entry *entry;
        struct stack_trace trace;
@@ -1012,8 +1033,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace_user(&trace);
-       ring_buffer_unlock_commit(tr->buffer, event);
-#endif
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 }
 
 #ifdef UNUSED
@@ -1023,6 +1044,8 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
 }
 #endif /* UNUSED */
 
+#endif /* CONFIG_STACKTRACE */
+
 static void
 ftrace_trace_special(void *__tr,
                     unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -1051,57 +1074,6 @@ __trace_special(void *__tr, void *__data,
 }
 
 void
-tracing_sched_switch_trace(struct trace_array *tr,
-                          struct task_struct *prev,
-                          struct task_struct *next,
-                          unsigned long flags, int pc)
-{
-       struct ring_buffer_event *event;
-       struct ctx_switch_entry *entry;
-
-       event = trace_buffer_lock_reserve(tr, TRACE_CTX,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->prev_pid                 = prev->pid;
-       entry->prev_prio                = prev->prio;
-       entry->prev_state               = prev->state;
-       entry->next_pid                 = next->pid;
-       entry->next_prio                = next->prio;
-       entry->next_state               = next->state;
-       entry->next_cpu = task_cpu(next);
-       trace_buffer_unlock_commit(tr, event, flags, pc);
-}
-
-void
-tracing_sched_wakeup_trace(struct trace_array *tr,
-                          struct task_struct *wakee,
-                          struct task_struct *curr,
-                          unsigned long flags, int pc)
-{
-       struct ring_buffer_event *event;
-       struct ctx_switch_entry *entry;
-
-       event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->prev_pid                 = curr->pid;
-       entry->prev_prio                = curr->prio;
-       entry->prev_state               = curr->state;
-       entry->next_pid                 = wakee->pid;
-       entry->next_prio                = wakee->prio;
-       entry->next_state               = wakee->state;
-       entry->next_cpu                 = task_cpu(wakee);
-
-       ring_buffer_unlock_commit(tr->buffer, event);
-       ftrace_trace_stack(tr, flags, 6, pc);
-       ftrace_trace_userstack(tr, flags, pc);
-}
-
-void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
        struct trace_array *tr = &global_trace;
@@ -1125,79 +1097,23 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
        local_irq_restore(flags);
 }
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-int trace_graph_entry(struct ftrace_graph_ent *trace)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int cpu;
-       int pc;
-
-       if (!ftrace_trace_task(current))
-               return 0;
-
-       if (!ftrace_graph_addr(trace->func))
-               return 0;
-
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-       if (likely(disabled == 1)) {
-               pc = preempt_count();
-               __trace_graph_entry(tr, trace, flags, pc);
-       }
-       /* Only do the atomic if it is not already set */
-       if (!test_tsk_trace_graph(current))
-               set_tsk_trace_graph(current);
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-
-       return 1;
-}
-
-void trace_graph_return(struct ftrace_graph_ret *trace)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int cpu;
-       int pc;
-
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-       if (likely(disabled == 1)) {
-               pc = preempt_count();
-               __trace_graph_return(tr, trace, flags, pc);
-       }
-       if (!trace->depth)
-               clear_tsk_trace_graph(current);
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-}
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-
 /**
  * trace_vbprintk - write binary msg to tracing buffer
  *
  */
-int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
        static raw_spinlock_t trace_buf_lock =
                (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
 
+       struct ftrace_event_call *call = &event_bprint;
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
        struct bprint_entry *entry;
        unsigned long flags;
+       int disable;
        int resched;
        int cpu, len = 0, size, pc;
 
@@ -1212,7 +1128,8 @@ int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
 
-       if (unlikely(atomic_read(&data->disabled)))
+       disable = atomic_inc_return(&data->disabled);
+       if (unlikely(disable != 1))
                goto out;
 
        /* Lockdep uses trace_printk for lock tracing */
@@ -1229,17 +1146,18 @@ int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
        entry->ip                       = ip;
-       entry->depth                    = depth;
        entry->fmt                      = fmt;
 
        memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
        __raw_spin_unlock(&trace_buf_lock);
        local_irq_restore(flags);
 
 out:
+       atomic_dec_return(&data->disabled);
        ftrace_preempt_enable(resched);
        unpause_graph_tracing();
 
@@ -1247,17 +1165,19 @@ out:
 }
 EXPORT_SYMBOL_GPL(trace_vbprintk);
 
-int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
        static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
        static char trace_buf[TRACE_BUF_SIZE];
 
+       struct ftrace_event_call *call = &event_print;
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
        int cpu, len = 0, size, pc;
        struct print_entry *entry;
        unsigned long irq_flags;
+       int disable;
 
        if (tracing_disabled || tracing_selftest_running)
                return 0;
@@ -1267,7 +1187,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
 
-       if (unlikely(atomic_read(&data->disabled)))
+       disable = atomic_inc_return(&data->disabled);
+       if (unlikely(disable != 1))
                goto out;
 
        pause_graph_tracing();
@@ -1284,17 +1205,18 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
        entry->ip                       = ip;
-       entry->depth                    = depth;
 
        memcpy(&entry->buf, trace_buf, len);
        entry->buf[len] = 0;
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
        __raw_spin_unlock(&trace_buf_lock);
        raw_local_irq_restore(irq_flags);
        unpause_graph_tracing();
  out:
+       atomic_dec_return(&data->disabled);
        preempt_enable_notrace();
 
        return len;
@@ -1491,12 +1413,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                p = s_next(m, p, &l);
        }
 
+       trace_event_read_lock();
        return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
        atomic_dec(&trace_record_cmdline_disabled);
+       trace_event_read_unlock();
 }
 
 static void print_lat_help_header(struct seq_file *m)
@@ -1597,7 +1521,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
                return;
 
        cpumask_set_cpu(iter->cpu, iter->started);
-       trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+       /* Don't print started cpu buffer for the first entry of the trace */
+       if (iter->idx > 1)
+               trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+                               iter->cpu);
 }
 
 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1687,38 +1615,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
        return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter)
-{
-       struct trace_seq *s = &iter->seq;
-       struct trace_entry *entry = iter->ent;
-       struct bprint_entry *field;
-       int ret;
-
-       trace_assign_type(field, entry);
-
-       ret = trace_seq_bprintf(s, field->fmt, field->buf);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
-{
-       struct trace_seq *s = &iter->seq;
-       struct trace_entry *entry = iter->ent;
-       struct print_entry *field;
-       int ret;
-
-       trace_assign_type(field, entry);
-
-       ret = trace_seq_printf(s, "%s", field->buf);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
@@ -1767,6 +1663,7 @@ static int trace_empty(struct trace_iterator *iter)
        return 1;
 }
 
+/*  Called with trace_event_read_lock() held. */
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
        enum print_line_t ret;
@@ -1780,12 +1677,12 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
        if (iter->ent->type == TRACE_BPRINT &&
                        trace_flags & TRACE_ITER_PRINTK &&
                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
-               return print_bprintk_msg_only(iter);
+               return trace_print_bprintk_msg_only(iter);
 
        if (iter->ent->type == TRACE_PRINT &&
                        trace_flags & TRACE_ITER_PRINTK &&
                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
-               return print_printk_msg_only(iter);
+               return trace_print_printk_msg_only(iter);
 
        if (trace_flags & TRACE_ITER_BIN)
                return print_bin_fmt(iter);
@@ -1864,6 +1761,11 @@ __tracing_open(struct inode *inode, struct file *file)
        if (current_trace)
                *iter->trace = *current_trace;
 
+       if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+               goto fail;
+
+       cpumask_clear(iter->started);
+
        if (current_trace && current_trace->print_max)
                iter->tr = &max_tr;
        else
@@ -1914,6 +1816,7 @@ __tracing_open(struct inode *inode, struct file *file)
                if (iter->buffer_iter[cpu])
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
+       free_cpumask_var(iter->started);
  fail:
        mutex_unlock(&trace_types_lock);
        kfree(iter->trace);
@@ -1934,9 +1837,14 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
 static int tracing_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = (struct seq_file *)file->private_data;
-       struct trace_iterator *iter = m->private;
+       struct trace_iterator *iter;
        int cpu;
 
+       if (!(file->f_mode & FMODE_READ))
+               return 0;
+
+       iter = m->private;
+
        mutex_lock(&trace_types_lock);
        for_each_tracing_cpu(cpu) {
                if (iter->buffer_iter[cpu])
@@ -1952,6 +1860,7 @@ static int tracing_release(struct inode *inode, struct file *file)
 
        seq_release(inode, file);
        mutex_destroy(&iter->mutex);
+       free_cpumask_var(iter->started);
        kfree(iter->trace);
        kfree(iter);
        return 0;
@@ -1962,37 +1871,47 @@ static int tracing_open(struct inode *inode, struct file *file)
        struct trace_iterator *iter;
        int ret = 0;
 
-       iter = __tracing_open(inode, file);
-       if (IS_ERR(iter))
-               ret = PTR_ERR(iter);
-       else if (trace_flags & TRACE_ITER_LATENCY_FMT)
-               iter->iter_flags |= TRACE_FILE_LAT_FMT;
+       /* If this file was open for write, then erase contents */
+       if ((file->f_mode & FMODE_WRITE) &&
+           (file->f_flags & O_TRUNC)) {
+               long cpu = (long) inode->i_private;
 
+               if (cpu == TRACE_PIPE_ALL_CPU)
+                       tracing_reset_online_cpus(&global_trace);
+               else
+                       tracing_reset(&global_trace, cpu);
+       }
+
+       if (file->f_mode & FMODE_READ) {
+               iter = __tracing_open(inode, file);
+               if (IS_ERR(iter))
+                       ret = PTR_ERR(iter);
+               else if (trace_flags & TRACE_ITER_LATENCY_FMT)
+                       iter->iter_flags |= TRACE_FILE_LAT_FMT;
+       }
        return ret;
 }
 
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct tracer *t = m->private;
+       struct tracer *t = v;
 
        (*pos)++;
 
        if (t)
                t = t->next;
 
-       m->private = t;
-
        return t;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-       struct tracer *t = m->private;
+       struct tracer *t;
        loff_t l = 0;
 
        mutex_lock(&trace_types_lock);
-       for (; t && l < *pos; t = t_next(m, t, &l))
+       for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
                ;
 
        return t;
@@ -2028,23 +1947,23 @@ static struct seq_operations show_traces_seq_ops = {
 
 static int show_traces_open(struct inode *inode, struct file *file)
 {
-       int ret;
-
        if (tracing_disabled)
                return -ENODEV;
 
-       ret = seq_open(file, &show_traces_seq_ops);
-       if (!ret) {
-               struct seq_file *m = file->private_data;
-               m->private = trace_types;
-       }
+       return seq_open(file, &show_traces_seq_ops);
+}
 
-       return ret;
+static ssize_t
+tracing_write_stub(struct file *filp, const char __user *ubuf,
+                  size_t count, loff_t *ppos)
+{
+       return count;
 }
 
 static const struct file_operations tracing_fops = {
        .open           = tracing_open,
        .read           = seq_read,
+       .write          = tracing_write_stub,
        .llseek         = seq_lseek,
        .release        = tracing_release,
 };
@@ -2104,11 +2023,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
                return -ENOMEM;
 
-       mutex_lock(&tracing_cpumask_update_lock);
        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
        if (err)
                goto err_unlock;
 
+       mutex_lock(&tracing_cpumask_update_lock);
+
        local_irq_disable();
        __raw_spin_lock(&ftrace_max_lock);
        for_each_tracing_cpu(cpu) {
@@ -2136,8 +2056,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        return count;
 
 err_unlock:
-       mutex_unlock(&tracing_cpumask_update_lock);
-       free_cpumask_var(tracing_cpumask);
+       free_cpumask_var(tracing_cpumask_new);
 
        return err;
 }
@@ -2179,8 +2098,8 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
                len += 3; /* "no" and newline */
        }
 
-       /* +2 for \n and \0 */
-       buf = kmalloc(len + 2, GFP_KERNEL);
+       /* +1 for \0 */
+       buf = kmalloc(len + 1, GFP_KERNEL);
        if (!buf) {
                mutex_unlock(&trace_types_lock);
                return -ENOMEM;
@@ -2203,7 +2122,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
        }
        mutex_unlock(&trace_types_lock);
 
-       WARN_ON(r >= len + 2);
+       WARN_ON(r >= len + 1);
 
        r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 
@@ -2214,23 +2133,23 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
 /* Try to assign a tracer specific option */
 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 {
-       struct tracer_flags *trace_flags = trace->flags;
+       struct tracer_flags *tracer_flags = trace->flags;
        struct tracer_opt *opts = NULL;
        int ret = 0, i = 0;
        int len;
 
-       for (i = 0; trace_flags->opts[i].name; i++) {
-               opts = &trace_flags->opts[i];
+       for (i = 0; tracer_flags->opts[i].name; i++) {
+               opts = &tracer_flags->opts[i];
                len = strlen(opts->name);
 
                if (strncmp(cmp, opts->name, len) == 0) {
-                       ret = trace->set_flag(trace_flags->val,
+                       ret = trace->set_flag(tracer_flags->val,
                                opts->bit, !neg);
                        break;
                }
        }
        /* Not found */
-       if (!trace_flags->opts[i].name)
+       if (!tracer_flags->opts[i].name)
                return -EINVAL;
 
        /* Refused to handle */
@@ -2238,13 +2157,25 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
                return ret;
 
        if (neg)
-               trace_flags->val &= ~opts->bit;
+               tracer_flags->val &= ~opts->bit;
        else
-               trace_flags->val |= opts->bit;
+               tracer_flags->val |= opts->bit;
 
        return 0;
 }
 
+static void set_tracer_flags(unsigned int mask, int enabled)
+{
+       /* do nothing if flag is already set */
+       if (!!(trace_flags & mask) == !!enabled)
+               return;
+
+       if (enabled)
+               trace_flags |= mask;
+       else
+               trace_flags &= ~mask;
+}
+
 static ssize_t
 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
@@ -2272,10 +2203,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                int len = strlen(trace_options[i]);
 
                if (strncmp(cmp, trace_options[i], len) == 0) {
-                       if (neg)
-                               trace_flags &= ~(1 << i);
-                       else
-                               trace_flags |= (1 << i);
+                       set_tracer_flags(1 << i, !neg);
                        break;
                }
        }
@@ -2302,21 +2230,20 @@ static const struct file_operations tracing_iter_fops = {
 
 static const char readme_msg[] =
        "tracing mini-HOWTO:\n\n"
-       "# mkdir /debug\n"
-       "# mount -t debugfs nodev /debug\n\n"
-       "# cat /debug/tracing/available_tracers\n"
-       "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
-       "# cat /debug/tracing/current_tracer\n"
-       "none\n"
-       "# echo sched_switch > /debug/tracing/current_tracer\n"
-       "# cat /debug/tracing/current_tracer\n"
+       "# mount -t debugfs nodev /sys/kernel/debug\n\n"
+       "# cat /sys/kernel/debug/tracing/available_tracers\n"
+       "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
+       "# cat /sys/kernel/debug/tracing/current_tracer\n"
+       "nop\n"
+       "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
+       "# cat /sys/kernel/debug/tracing/current_tracer\n"
        "sched_switch\n"
-       "# cat /debug/tracing/trace_options\n"
+       "# cat /sys/kernel/debug/tracing/trace_options\n"
        "noprint-parent nosym-offset nosym-addr noverbose\n"
-       "# echo print-parent > /debug/tracing/trace_options\n"
-       "# echo 1 > /debug/tracing/tracing_enabled\n"
-       "# cat /debug/tracing/trace > /tmp/trace.txt\n"
-       "echo 0 > /debug/tracing/tracing_enabled\n"
+       "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
+       "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
+       "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
+       "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
 ;
 
 static ssize_t
@@ -2333,6 +2260,56 @@ static const struct file_operations tracing_readme_fops = {
 };
 
 static ssize_t
+tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
+                               size_t cnt, loff_t *ppos)
+{
+       char *buf_comm;
+       char *file_buf;
+       char *buf;
+       int len = 0;
+       int pid;
+       int i;
+
+       file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
+       if (!file_buf)
+               return -ENOMEM;
+
+       buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
+       if (!buf_comm) {
+               kfree(file_buf);
+               return -ENOMEM;
+       }
+
+       buf = file_buf;
+
+       for (i = 0; i < SAVED_CMDLINES; i++) {
+               int r;
+
+               pid = map_cmdline_to_pid[i];
+               if (pid == -1 || pid == NO_CMDLINE_MAP)
+                       continue;
+
+               trace_find_cmdline(pid, buf_comm);
+               r = sprintf(buf, "%d %s\n", pid, buf_comm);
+               buf += r;
+               len += r;
+       }
+
+       len = simple_read_from_buffer(ubuf, cnt, ppos,
+                                     file_buf, len);
+
+       kfree(file_buf);
+       kfree(buf_comm);
+
+       return len;
+}
+
+static const struct file_operations tracing_saved_cmdlines_fops = {
+    .open       = tracing_open_generic,
+    .read       = tracing_saved_cmdlines_read,
+};
+
+static ssize_t
 tracing_ctrl_read(struct file *filp, char __user *ubuf,
                  size_t cnt, loff_t *ppos)
 {
@@ -2664,6 +2641,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
        /* trace pipe does not show start of buffer */
        cpumask_setall(iter->started);
 
+       if (trace_flags & TRACE_ITER_LATENCY_FMT)
+               iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
        iter->cpu_file = cpu_file;
        iter->tr = &global_trace;
        mutex_init(&iter->mutex);
@@ -2851,6 +2831,7 @@ waitagain:
               offsetof(struct trace_iterator, seq));
        iter->pos = -1;
 
+       trace_event_read_lock();
        while (find_next_entry_inc(iter) != NULL) {
                enum print_line_t ret;
                int len = iter->seq.len;
@@ -2867,6 +2848,7 @@ waitagain:
                if (iter->seq.len >= cnt)
                        break;
        }
+       trace_event_read_unlock();
 
        /* Now copy what we have to the user */
        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -2929,7 +2911,8 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
                        break;
                }
 
-               trace_consume(iter);
+               if (ret != TRACE_TYPE_NO_CONSUME)
+                       trace_consume(iter);
                rem -= count;
                if (!find_next_entry_inc(iter)) {
                        rem = 0;
@@ -2989,6 +2972,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
                goto out_err;
        }
 
+       trace_event_read_lock();
+
        /* Fill as many pages as possible. */
        for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
                pages[i] = alloc_page(GFP_KERNEL);
@@ -3011,6 +2996,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
                trace_seq_init(&iter->seq);
        }
 
+       trace_event_read_unlock();
        mutex_unlock(&iter->mutex);
 
        spd.nr_pages = i;
@@ -3115,7 +3101,7 @@ static int mark_printk(const char *fmt, ...)
        int ret;
        va_list args;
        va_start(args, fmt);
-       ret = trace_vprintk(0, -1, fmt, args);
+       ret = trace_vprintk(0, fmt, args);
        va_end(args);
        return ret;
 }
@@ -3155,6 +3141,62 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
 
+static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
+                                 size_t cnt, loff_t *ppos)
+{
+       char buf[64];
+       int bufiter = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
+               bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+                       "%s%s%s%s", i ? " " : "",
+                       i == trace_clock_id ? "[" : "", trace_clocks[i].name,
+                       i == trace_clock_id ? "]" : "");
+       bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+}
+
+static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
+                                  size_t cnt, loff_t *fpos)
+{
+       char buf[64];
+       const char *clockstr;
+       int i;
+
+       if (cnt >= sizeof(buf))
+               return -EINVAL;
+
+       if (copy_from_user(&buf, ubuf, cnt))
+               return -EFAULT;
+
+       buf[cnt] = 0;
+
+       clockstr = strstrip(buf);
+
+       for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
+               if (strcmp(trace_clocks[i].name, clockstr) == 0)
+                       break;
+       }
+       if (i == ARRAY_SIZE(trace_clocks))
+               return -EINVAL;
+
+       trace_clock_id = i;
+
+       mutex_lock(&trace_types_lock);
+
+       ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
+       if (max_tr.buffer)
+               ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
+
+       mutex_unlock(&trace_types_lock);
+
+       *fpos += cnt;
+
+       return cnt;
+}
+
 static const struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_max_lat_read,
@@ -3192,6 +3234,12 @@ static const struct file_operations tracing_mark_fops = {
        .write          = tracing_mark_write,
 };
 
+static const struct file_operations trace_clock_fops = {
+       .open           = tracing_open_generic,
+       .read           = tracing_clock_read,
+       .write          = tracing_clock_write,
+};
+
 struct ftrace_buffer_info {
        struct trace_array      *tr;
        void                    *spare;
@@ -3213,19 +3261,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
 
        info->tr        = &global_trace;
        info->cpu       = cpu;
-       info->spare     = ring_buffer_alloc_read_page(info->tr->buffer);
+       info->spare     = NULL;
        /* Force reading ring buffer for first read */
        info->read      = (unsigned int)-1;
-       if (!info->spare)
-               goto out;
 
        filp->private_data = info;
 
-       return 0;
-
- out:
-       kfree(info);
-       return -ENOMEM;
+       return nonseekable_open(inode, filp);
 }
 
 static ssize_t
@@ -3240,6 +3282,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
        if (!count)
                return 0;
 
+       if (!info->spare)
+               info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+       if (!info->spare)
+               return -ENOMEM;
+
        /* Do we have previous read data to read? */
        if (info->read < PAGE_SIZE)
                goto read;
@@ -3278,7 +3325,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
 {
        struct ftrace_buffer_info *info = file->private_data;
 
-       ring_buffer_free_read_page(info->tr->buffer, info->spare);
+       if (info->spare)
+               ring_buffer_free_read_page(info->tr->buffer, info->spare);
        kfree(info);
 
        return 0;
@@ -3361,17 +3409,24 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                .spd_release    = buffer_spd_release,
        };
        struct buffer_ref *ref;
-       int size, i;
+       int entries, size, i;
        size_t ret;
 
-       /*
-        * We can't seek on a buffer input
-        */
-       if (unlikely(*ppos))
-               return -ESPIPE;
+       if (*ppos & (PAGE_SIZE - 1)) {
+               WARN_ONCE(1, "Ftrace: previous read must page-align\n");
+               return -EINVAL;
+       }
 
+       if (len & (PAGE_SIZE - 1)) {
+               WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
+               if (len < PAGE_SIZE)
+                       return -EINVAL;
+               len &= PAGE_MASK;
+       }
+
+       entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 
-       for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
+       for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
                int r;
 
@@ -3379,6 +3434,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                if (!ref)
                        break;
 
+               ref->ref = 1;
                ref->buffer = info->tr->buffer;
                ref->page = ring_buffer_alloc_read_page(ref->buffer);
                if (!ref->page) {
@@ -3387,7 +3443,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                }
 
                r = ring_buffer_read_page(ref->buffer, &ref->page,
-                                         len, info->cpu, 0);
+                                         len, info->cpu, 1);
                if (r < 0) {
                        ring_buffer_free_read_page(ref->buffer,
                                                   ref->page);
@@ -3410,6 +3466,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                spd.partial[i].offset = 0;
                spd.partial[i].private = (unsigned long)ref;
                spd.nr_pages++;
+               *ppos += PAGE_SIZE;
+
+               entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
        }
 
        spd.nr_pages = i;
@@ -3437,6 +3496,42 @@ static const struct file_operations tracing_buffers_fops = {
        .llseek         = no_llseek,
 };
 
+static ssize_t
+tracing_stats_read(struct file *filp, char __user *ubuf,
+                  size_t count, loff_t *ppos)
+{
+       unsigned long cpu = (unsigned long)filp->private_data;
+       struct trace_array *tr = &global_trace;
+       struct trace_seq *s;
+       unsigned long cnt;
+
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return ENOMEM;
+
+       trace_seq_init(s);
+
+       cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "entries: %ld\n", cnt);
+
+       cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "overrun: %ld\n", cnt);
+
+       cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+
+       count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+       kfree(s);
+
+       return count;
+}
+
+static const struct file_operations tracing_stats_fops = {
+       .open           = tracing_open_generic,
+       .read           = tracing_stats_read,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3483,6 +3578,9 @@ struct dentry *tracing_init_dentry(void)
        if (d_tracer)
                return d_tracer;
 
+       if (!debugfs_initialized())
+               return NULL;
+
        d_tracer = debugfs_create_dir("tracing", NULL);
 
        if (!d_tracer && !once) {
@@ -3523,7 +3621,7 @@ struct dentry *tracing_dentry_percpu(void)
 static void tracing_init_debugfs_percpu(long cpu)
 {
        struct dentry *d_percpu = tracing_dentry_percpu();
-       struct dentry *entry, *d_cpu;
+       struct dentry *d_cpu;
        /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
        char cpu_dir[7];
 
@@ -3538,21 +3636,18 @@ static void tracing_init_debugfs_percpu(long cpu)
        }
 
        /* per cpu trace_pipe */
-       entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
-                               (void *) cpu, &tracing_pipe_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace_pipe' entry\n");
+       trace_create_file("trace_pipe", 0444, d_cpu,
+                       (void *) cpu, &tracing_pipe_fops);
 
        /* per cpu trace */
-       entry = debugfs_create_file("trace", 0444, d_cpu,
-                               (void *) cpu, &tracing_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace' entry\n");
+       trace_create_file("trace", 0644, d_cpu,
+                       (void *) cpu, &tracing_fops);
 
-       entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
-                                   (void *) cpu, &tracing_buffers_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
+       trace_create_file("trace_pipe_raw", 0444, d_cpu,
+                       (void *) cpu, &tracing_buffers_fops);
+
+       trace_create_file("stats", 0444, d_cpu,
+                       (void *) cpu, &tracing_stats_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -3708,6 +3803,22 @@ static const struct file_operations trace_options_core_fops = {
        .write = trace_options_core_write,
 };
 
+struct dentry *trace_create_file(const char *name,
+                                mode_t mode,
+                                struct dentry *parent,
+                                void *data,
+                                const struct file_operations *fops)
+{
+       struct dentry *ret;
+
+       ret = debugfs_create_file(name, mode, parent, data, fops);
+       if (!ret)
+               pr_warning("Could not create debugfs '%s' entry\n", name);
+
+       return ret;
+}
+
+
 static struct dentry *trace_options_init_dentry(void)
 {
        struct dentry *d_tracer;
@@ -3735,7 +3846,6 @@ create_trace_option_file(struct trace_option_dentry *topt,
                         struct tracer_opt *opt)
 {
        struct dentry *t_options;
-       struct dentry *entry;
 
        t_options = trace_options_init_dentry();
        if (!t_options)
@@ -3744,11 +3854,9 @@ create_trace_option_file(struct trace_option_dentry *topt,
        topt->flags = flags;
        topt->opt = opt;
 
-       entry = debugfs_create_file(opt->name, 0644, t_options, topt,
+       topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
                                    &trace_options_fops);
 
-       topt->entry = entry;
-
 }
 
 static struct trace_option_dentry *
@@ -3803,123 +3911,89 @@ static struct dentry *
 create_trace_option_core_file(const char *option, long index)
 {
        struct dentry *t_options;
-       struct dentry *entry;
 
        t_options = trace_options_init_dentry();
        if (!t_options)
                return NULL;
 
-       entry = debugfs_create_file(option, 0644, t_options, (void *)index,
+       return trace_create_file(option, 0644, t_options, (void *)index,
                                    &trace_options_core_fops);
-
-       return entry;
 }
 
 static __init void create_trace_options_dir(void)
 {
        struct dentry *t_options;
-       struct dentry *entry;
        int i;
 
        t_options = trace_options_init_dentry();
        if (!t_options)
                return;
 
-       for (i = 0; trace_options[i]; i++) {
-               entry = create_trace_option_core_file(trace_options[i], i);
-               if (!entry)
-                       pr_warning("Could not create debugfs %s entry\n",
-                                  trace_options[i]);
-       }
+       for (i = 0; trace_options[i]; i++)
+               create_trace_option_core_file(trace_options[i], i);
 }
 
 static __init int tracer_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
        int cpu;
 
        d_tracer = tracing_init_dentry();
 
-       entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
-                                   &global_trace, &tracing_ctrl_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+       trace_create_file("tracing_enabled", 0644, d_tracer,
+                       &global_trace, &tracing_ctrl_fops);
 
-       entry = debugfs_create_file("trace_options", 0644, d_tracer,
-                                   NULL, &tracing_iter_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace_options' entry\n");
+       trace_create_file("trace_options", 0644, d_tracer,
+                       NULL, &tracing_iter_fops);
 
-       create_trace_options_dir();
+       trace_create_file("tracing_cpumask", 0644, d_tracer,
+                       NULL, &tracing_cpumask_fops);
 
-       entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
-                                   NULL, &tracing_cpumask_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
-
-       entry = debugfs_create_file("trace", 0444, d_tracer,
-                                (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace' entry\n");
-
-       entry = debugfs_create_file("available_tracers", 0444, d_tracer,
-                                   &global_trace, &show_traces_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'available_tracers' entry\n");
-
-       entry = debugfs_create_file("current_tracer", 0444, d_tracer,
-                                   &global_trace, &set_tracer_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'current_tracer' entry\n");
-
-       entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
-                                   &tracing_max_latency,
-                                   &tracing_max_lat_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'tracing_max_latency' entry\n");
-
-       entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
-                                   &tracing_thresh, &tracing_max_lat_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'tracing_thresh' entry\n");
-       entry = debugfs_create_file("README", 0644, d_tracer,
-                                   NULL, &tracing_readme_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'README' entry\n");
-
-       entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
+       trace_create_file("trace", 0644, d_tracer,
+                       (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
+
+       trace_create_file("available_tracers", 0444, d_tracer,
+                       &global_trace, &show_traces_fops);
+
+       trace_create_file("current_tracer", 0644, d_tracer,
+                       &global_trace, &set_tracer_fops);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       trace_create_file("tracing_max_latency", 0644, d_tracer,
+                       &tracing_max_latency, &tracing_max_lat_fops);
+
+       trace_create_file("tracing_thresh", 0644, d_tracer,
+                       &tracing_thresh, &tracing_max_lat_fops);
+#endif
+
+       trace_create_file("README", 0444, d_tracer,
+                       NULL, &tracing_readme_fops);
+
+       trace_create_file("trace_pipe", 0444, d_tracer,
                        (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'trace_pipe' entry\n");
-
-       entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
-                                   &global_trace, &tracing_entries_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'buffer_size_kb' entry\n");
-
-       entry = debugfs_create_file("trace_marker", 0220, d_tracer,
-                                   NULL, &tracing_mark_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'trace_marker' entry\n");
+
+       trace_create_file("buffer_size_kb", 0644, d_tracer,
+                       &global_trace, &tracing_entries_fops);
+
+       trace_create_file("trace_marker", 0220, d_tracer,
+                       NULL, &tracing_mark_fops);
+
+       trace_create_file("saved_cmdlines", 0444, d_tracer,
+                       NULL, &tracing_saved_cmdlines_fops);
+
+       trace_create_file("trace_clock", 0644, d_tracer, NULL,
+                         &trace_clock_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-       entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-                                   &ftrace_update_tot_cnt,
-                                   &tracing_dyn_info_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'dyn_ftrace_total_info' entry\n");
+       trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+                       &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 #ifdef CONFIG_SYSPROF_TRACER
        init_tracer_sysprof_debugfs(d_tracer);
 #endif
 
+       create_trace_options_dir();
+
        for_each_tracing_cpu(cpu)
                tracing_init_debugfs_percpu(cpu);
 
@@ -3988,30 +4062,36 @@ trace_printk_seq(struct trace_seq *s)
        trace_seq_init(s);
 }
 
-void ftrace_dump(void)
+static void __ftrace_dump(bool disable_tracing)
 {
-       static DEFINE_SPINLOCK(ftrace_dump_lock);
+       static raw_spinlock_t ftrace_dump_lock =
+               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
+       unsigned int old_userobj;
        static int dump_ran;
        unsigned long flags;
        int cnt = 0, cpu;
 
        /* only one dump */
-       spin_lock_irqsave(&ftrace_dump_lock, flags);
+       local_irq_save(flags);
+       __raw_spin_lock(&ftrace_dump_lock);
        if (dump_ran)
                goto out;
 
        dump_ran = 1;
 
-       /* No turning back! */
        tracing_off();
-       ftrace_kill();
+
+       if (disable_tracing)
+               ftrace_kill();
 
        for_each_tracing_cpu(cpu) {
                atomic_inc(&global_trace.data[cpu]->disabled);
        }
 
+       old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
+
        /* don't look at user memory in panic mode */
        trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
 
@@ -4044,8 +4124,11 @@ void ftrace_dump(void)
                iter.pos = -1;
 
                if (find_next_entry_inc(&iter) != NULL) {
-                       print_trace_line(&iter);
-                       trace_consume(&iter);
+                       int ret;
+
+                       ret = print_trace_line(&iter);
+                       if (ret != TRACE_TYPE_NO_CONSUME)
+                               trace_consume(&iter);
                }
 
                trace_printk_seq(&iter.seq);
@@ -4056,13 +4139,29 @@ void ftrace_dump(void)
        else
                printk(KERN_TRACE "---------------------------------\n");
 
+       /* Re-enable tracing if requested */
+       if (!disable_tracing) {
+               trace_flags |= old_userobj;
+
+               for_each_tracing_cpu(cpu) {
+                       atomic_dec(&global_trace.data[cpu]->disabled);
+               }
+               tracing_on();
+       }
+
  out:
-       spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+       __raw_spin_unlock(&ftrace_dump_lock);
+       local_irq_restore(flags);
+}
+
+/* By default: disable tracing after the dump */
+void ftrace_dump(void)
+{
+       __ftrace_dump(true);
 }
 
 __init static int tracer_alloc_buffers(void)
 {
-       struct trace_array_cpu *data;
        int ring_buf_size;
        int i;
        int ret = -ENOMEM;
@@ -4112,7 +4211,7 @@ __init static int tracer_alloc_buffers(void)
 
        /* Allocate the first page for all buffers */
        for_each_tracing_cpu(i) {
-               data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+               global_trace.data[i] = &per_cpu(global_trace_cpu, i);
                max_tr.data[i] = &per_cpu(max_data, i);
        }