KVM: MMU: invalidate and flush on spte small->large page size change
[safe/jmp/linux-2.6] / kernel / trace / trace.c
index da984ad..086d363 100644 (file)
@@ -12,7 +12,7 @@
  *  Copyright (C) 2004 William Lee Irwin III
  */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
 #include <linux/splice.h>
 #include <linux/kdebug.h>
 #include <linux/string.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
-#include <linux/gfp.h>
 #include <linux/fs.h>
 
 #include "trace.h"
 
 #define TRACE_BUFFER_FLAGS     (RB_FL_OVERWRITE)
 
-unsigned long __read_mostly    tracing_max_latency;
-unsigned long __read_mostly    tracing_thresh;
-
 /*
  * On boot up, the ring buffer is set to the minimum size, so that
  * we do not waste memory on systems that are not using tracing.
  */
-static int ring_buffer_expanded;
+int ring_buffer_expanded;
 
 /*
  * We need to change this state when a selftest is running.
@@ -64,7 +62,7 @@ static bool __read_mostly tracing_selftest_running;
 /*
  * If a tracer is running, we do not want to run SELFTEST.
  */
-static bool __read_mostly tracing_selftest_disabled;
+bool __read_mostly tracing_selftest_disabled;
 
 /* For tracers that don't implement custom flags */
 static struct tracer_opt dummy_tracer_opt[] = {
@@ -89,25 +87,22 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
  */
 static int tracing_disabled = 1;
 
-static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+DEFINE_PER_CPU(int, ftrace_cpu_disabled);
 
 static inline void ftrace_disable_cpu(void)
 {
        preempt_disable();
-       local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_inc(ftrace_cpu_disabled);
 }
 
 static inline void ftrace_enable_cpu(void)
 {
-       local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_dec(ftrace_cpu_disabled);
        preempt_enable();
 }
 
 static cpumask_var_t __read_mostly     tracing_buffer_mask;
 
-/* Define which cpu buffers are currently read in trace_pipe */
-static cpumask_var_t                   tracing_reader_cpumask;
-
 #define for_each_tracing_cpu(cpu)      \
        for_each_cpu(cpu, tracing_buffer_mask)
 
@@ -122,30 +117,42 @@ static cpumask_var_t                      tracing_reader_cpumask;
  *
  * It is default off, but you can enable it with either specifying
  * "ftrace_dump_on_oops" in the kernel command line, or setting
- * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ * /proc/sys/kernel/ftrace_dump_on_oops
+ * Set 1 if you want to dump buffers of all CPUs
+ * Set 2 if you want to dump the buffer of the CPU that triggered oops
  */
-int ftrace_dump_on_oops;
+
+enum ftrace_dump_mode ftrace_dump_on_oops;
 
 static int tracing_set_tracer(const char *buf);
 
-#define BOOTUP_TRACER_SIZE             100
-static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
+#define MAX_TRACER_SIZE                100
+static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 static char *default_bootup_tracer;
 
-static int __init set_ftrace(char *str)
+static int __init set_cmdline_ftrace(char *str)
 {
-       strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
+       strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
        default_bootup_tracer = bootup_tracer_buf;
        /* We are using ftrace early, expand it */
        ring_buffer_expanded = 1;
        return 1;
 }
-__setup("ftrace=", set_ftrace);
+__setup("ftrace=", set_cmdline_ftrace);
 
 static int __init set_ftrace_dump_on_oops(char *str)
 {
-       ftrace_dump_on_oops = 1;
-       return 1;
+       if (*str++ != '=' || !*str) {
+               ftrace_dump_on_oops = DUMP_ALL;
+               return 1;
+       }
+
+       if (!strcmp("orig_cpu", str)) {
+               ftrace_dump_on_oops = DUMP_ORIG;
+                return 1;
+        }
+
+        return 0;
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
@@ -172,10 +179,11 @@ static struct trace_array global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
-int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+int filter_current_check_discard(struct ring_buffer *buffer,
+                                struct ftrace_event_call *call, void *rec,
                                 struct ring_buffer_event *event)
 {
-       return filter_check_discard(call, rec, global_trace.buffer, event);
+       return filter_check_discard(call, rec, buffer, event);
 }
 EXPORT_SYMBOL_GPL(filter_current_check_discard);
 
@@ -205,7 +213,7 @@ cycle_t ftrace_now(int cpu)
  */
 static struct trace_array      max_tr;
 
-static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
 
 /* tracer_enabled is used to toggle activation of a tracer */
 static int                     tracer_enabled = 1;
@@ -244,19 +252,91 @@ static struct tracer              *trace_types __read_mostly;
 static struct tracer           *current_trace __read_mostly;
 
 /*
- * max_tracer_type_len is used to simplify the allocating of
- * buffers to read userspace tracer names. We keep track of
- * the longest tracer name registered.
+ * trace_types_lock is used to protect the trace_types list.
  */
-static int                     max_tracer_type_len;
+static DEFINE_MUTEX(trace_types_lock);
 
 /*
- * trace_types_lock is used to protect the trace_types list.
- * This lock is also used to keep user access serialized.
- * Accesses from userspace will grab this lock while userspace
- * activities happen inside the kernel.
+ * serialize the access of the ring buffer
+ *
+ * ring buffer serializes readers, but it is low level protection.
+ * The validity of the events (which returns by ring_buffer_peek() ..etc)
+ * are not protected by ring buffer.
+ *
+ * The content of events may become garbage if we allow other process consumes
+ * these events concurrently:
+ *   A) the page of the consumed events may become a normal page
+ *      (not reader page) in ring buffer, and this page will be rewrited
+ *      by events producer.
+ *   B) The page of the consumed events may become a page for splice_read,
+ *      and this page will be returned to system.
+ *
+ * These primitives allow multi process access to different cpu ring buffer
+ * concurrently.
+ *
+ * These primitives don't distinguish read-only and read-consume access.
+ * Multi read-only access are also serialized.
  */
-static DEFINE_MUTEX(trace_types_lock);
+
+#ifdef CONFIG_SMP
+static DECLARE_RWSEM(all_cpu_access_lock);
+static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+       if (cpu == TRACE_PIPE_ALL_CPU) {
+               /* gain it for accessing the whole ring buffer. */
+               down_write(&all_cpu_access_lock);
+       } else {
+               /* gain it for accessing a cpu ring buffer. */
+
+               /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+               down_read(&all_cpu_access_lock);
+
+               /* Secondly block other access to this @cpu ring buffer. */
+               mutex_lock(&per_cpu(cpu_access_lock, cpu));
+       }
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+       if (cpu == TRACE_PIPE_ALL_CPU) {
+               up_write(&all_cpu_access_lock);
+       } else {
+               mutex_unlock(&per_cpu(cpu_access_lock, cpu));
+               up_read(&all_cpu_access_lock);
+       }
+}
+
+static inline void trace_access_lock_init(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               mutex_init(&per_cpu(cpu_access_lock, cpu));
+}
+
+#else
+
+static DEFINE_MUTEX(access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+       (void)cpu;
+       mutex_lock(&access_lock);
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+       (void)cpu;
+       mutex_unlock(&access_lock);
+}
+
+static inline void trace_access_lock_init(void)
+{
+}
+
+#endif
 
 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
@@ -266,6 +346,9 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
        TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
        TRACE_ITER_GRAPH_TIME;
 
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
@@ -274,12 +357,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
  */
 void trace_wake_up(void)
 {
+       int cpu;
+
+       if (trace_flags & TRACE_ITER_BLOCK)
+               return;
        /*
         * The runqueue_is_locked() can fail, but this is the best we
         * have for now:
         */
-       if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
+       cpu = get_cpu();
+       if (!runqueue_is_locked(cpu))
                wake_up(&trace_wait);
+       put_cpu();
 }
 
 static int __init set_buf_size(char *str)
@@ -297,6 +386,21 @@ static int __init set_buf_size(char *str)
 }
 __setup("trace_buf_size=", set_buf_size);
 
+static int __init set_tracing_thresh(char *str)
+{
+       unsigned long threshhold;
+       int ret;
+
+       if (!str)
+               return 0;
+       ret = strict_strtoul(str, 0, &threshhold);
+       if (ret < 0)
+               return 0;
+       tracing_thresh = threshhold * 1000;
+       return 1;
+}
+__setup("tracing_thresh=", set_tracing_thresh);
+
 unsigned long nsecs_to_usecs(unsigned long nsecs)
 {
        return nsecs / 1000;
@@ -313,7 +417,6 @@ static const char *trace_options[] = {
        "bin",
        "block",
        "stacktrace",
-       "sched-tree",
        "trace_printk",
        "ftrace_preempt",
        "branch",
@@ -323,49 +426,125 @@ static const char *trace_options[] = {
        "printk-msg-only",
        "context-info",
        "latency-format",
-       "global-clock",
        "sleep-time",
        "graph-time",
        NULL
 };
 
+static struct {
+       u64 (*func)(void);
+       const char *name;
+} trace_clocks[] = {
+       { trace_clock_local,    "local" },
+       { trace_clock_global,   "global" },
+};
+
+int trace_clock_id;
+
 /*
- * ftrace_max_lock is used to protect the swapping of buffers
- * when taking a max snapshot. The buffers themselves are
- * protected by per_cpu spinlocks. But the action of the swap
- * needs its own lock.
- *
- * This is defined as a raw_spinlock_t in order to help
- * with performance when lockdep debugging is enabled.
+ * trace_parser_get_init - gets the buffer for trace parser
  */
-static raw_spinlock_t ftrace_max_lock =
-       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+int trace_parser_get_init(struct trace_parser *parser, int size)
+{
+       memset(parser, 0, sizeof(*parser));
+
+       parser->buffer = kmalloc(size, GFP_KERNEL);
+       if (!parser->buffer)
+               return 1;
+
+       parser->size = size;
+       return 0;
+}
 
 /*
- * Copy the new maximum trace into the separate maximum-trace
- * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
+ * trace_parser_put - frees the buffer for trace parser
  */
-static void
-__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+void trace_parser_put(struct trace_parser *parser)
 {
-       struct trace_array_cpu *data = tr->data[cpu];
+       kfree(parser->buffer);
+}
 
-       max_tr.cpu = cpu;
-       max_tr.time_start = data->preempt_timestamp;
+/*
+ * trace_get_user - reads the user input string separated by  space
+ * (matched by isspace(ch))
+ *
+ * For each string found the 'struct trace_parser' is updated,
+ * and the function returns.
+ *
+ * Returns number of bytes read.
+ *
+ * See kernel/trace/trace.h for 'struct trace_parser' details.
+ */
+int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
+       size_t cnt, loff_t *ppos)
+{
+       char ch;
+       size_t read = 0;
+       ssize_t ret;
+
+       if (!*ppos)
+               trace_parser_clear(parser);
 
-       data = max_tr.data[cpu];
-       data->saved_latency = tracing_max_latency;
+       ret = get_user(ch, ubuf++);
+       if (ret)
+               goto out;
 
-       memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
-       data->pid = tsk->pid;
-       data->uid = task_uid(tsk);
-       data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
-       data->policy = tsk->policy;
-       data->rt_priority = tsk->rt_priority;
+       read++;
+       cnt--;
 
-       /* record this tasks comm */
-       tracing_record_cmdline(tsk);
+       /*
+        * The parser is not finished with the last write,
+        * continue reading the user input without skipping spaces.
+        */
+       if (!parser->cont) {
+               /* skip white space */
+               while (cnt && isspace(ch)) {
+                       ret = get_user(ch, ubuf++);
+                       if (ret)
+                               goto out;
+                       read++;
+                       cnt--;
+               }
+
+               /* only spaces were written */
+               if (isspace(ch)) {
+                       *ppos += read;
+                       ret = read;
+                       goto out;
+               }
+
+               parser->idx = 0;
+       }
+
+       /* read the non-space input */
+       while (cnt && !isspace(ch)) {
+               if (parser->idx < parser->size - 1)
+                       parser->buffer[parser->idx++] = ch;
+               else {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               ret = get_user(ch, ubuf++);
+               if (ret)
+                       goto out;
+               read++;
+               cnt--;
+       }
+
+       /* We either got finished input or we have to wait for another call. */
+       if (isspace(ch)) {
+               parser->buffer[parser->idx] = 0;
+               parser->cont = false;
+       } else {
+               parser->cont = true;
+               parser->buffer[parser->idx++] = ch;
+       }
+
+       *ppos += read;
+       ret = read;
+
+out:
+       return ret;
 }
 
 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
@@ -411,6 +590,57 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
        return cnt;
 }
 
+/*
+ * ftrace_max_lock is used to protect the swapping of buffers
+ * when taking a max snapshot. The buffers themselves are
+ * protected by per_cpu spinlocks. But the action of the swap
+ * needs its own lock.
+ *
+ * This is defined as a arch_spinlock_t in order to help
+ * with performance when lockdep debugging is enabled.
+ *
+ * It is also used in other places outside the update_max_tr
+ * so it needs to be defined outside of the
+ * CONFIG_TRACER_MAX_TRACE.
+ */
+static arch_spinlock_t ftrace_max_lock =
+       (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+
+unsigned long __read_mostly    tracing_thresh;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+unsigned long __read_mostly    tracing_max_latency;
+
+/*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+ * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
+ */
+static void
+__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+       struct trace_array_cpu *data = tr->data[cpu];
+       struct trace_array_cpu *max_data;
+
+       max_tr.cpu = cpu;
+       max_tr.time_start = data->preempt_timestamp;
+
+       max_data = max_tr.data[cpu];
+       max_data->saved_latency = tracing_max_latency;
+       max_data->critical_start = data->critical_start;
+       max_data->critical_end = data->critical_end;
+
+       memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
+       max_data->pid = tsk->pid;
+       max_data->uid = task_uid(tsk);
+       max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
+       max_data->policy = tsk->policy;
+       max_data->rt_priority = tsk->rt_priority;
+
+       /* record this tasks comm */
+       tracing_record_cmdline(tsk);
+}
+
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -425,18 +655,17 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
        struct ring_buffer *buf = tr->buffer;
 
+       if (trace_stop_count)
+               return;
+
        WARN_ON_ONCE(!irqs_disabled());
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
 
        tr->buffer = max_tr.buffer;
        max_tr.buffer = buf;
 
-       ftrace_disable_cpu();
-       ring_buffer_reset(tr->buffer);
-       ftrace_enable_cpu();
-
        __update_max_tr(tr, tsk, cpu);
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
 }
 
 /**
@@ -452,21 +681,35 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
        int ret;
 
+       if (trace_stop_count)
+               return;
+
        WARN_ON_ONCE(!irqs_disabled());
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
 
        ftrace_disable_cpu();
 
-       ring_buffer_reset(max_tr.buffer);
        ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
 
+       if (ret == -EBUSY) {
+               /*
+                * We failed to swap the buffer due to a commit taking
+                * place on this CPU. We fail to record, but we reset
+                * the max trace buffer (no one writes directly to it)
+                * and flag that it failed.
+                */
+               trace_array_printk(&max_tr, _THIS_IP_,
+                       "Failed to swap buffers due to commit in progress\n");
+       }
+
        ftrace_enable_cpu();
 
-       WARN_ON_ONCE(ret && ret != -EAGAIN);
+       WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
 
        __update_max_tr(tr, tsk, cpu);
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
 }
+#endif /* CONFIG_TRACER_MAX_TRACE */
 
 /**
  * register_tracer - register a tracer with the ftrace system.
@@ -479,7 +722,6 @@ __releases(kernel_lock)
 __acquires(kernel_lock)
 {
        struct tracer *t;
-       int len;
        int ret = 0;
 
        if (!type->name) {
@@ -487,6 +729,11 @@ __acquires(kernel_lock)
                return -1;
        }
 
+       if (strlen(type->name) > MAX_TRACER_SIZE) {
+               pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
+               return -1;
+       }
+
        /*
         * When this gets called we hold the BKL which means that
         * preemption is disabled. Various trace selftests however
@@ -501,7 +748,7 @@ __acquires(kernel_lock)
        for (t = trace_types; t; t = t->next) {
                if (strcmp(type->name, t->name) == 0) {
                        /* already found */
-                       pr_info("Trace %s already registered\n",
+                       pr_info("Tracer %s already registered\n",
                                type->name);
                        ret = -1;
                        goto out;
@@ -523,7 +770,6 @@ __acquires(kernel_lock)
        if (type->selftest && !tracing_selftest_disabled) {
                struct tracer *saved_tracer = current_trace;
                struct trace_array *tr = &global_trace;
-               int i;
 
                /*
                 * Run a selftest on this tracer.
@@ -532,8 +778,7 @@ __acquires(kernel_lock)
                 * internal tracing to verify that everything is in order.
                 * If we fail, we do not register this tracer.
                 */
-               for_each_tracing_cpu(i)
-                       tracing_reset(tr, i);
+               tracing_reset_online_cpus(tr);
 
                current_trace = type;
                /* the test is responsible for initializing and enabling */
@@ -546,8 +791,7 @@ __acquires(kernel_lock)
                        goto out;
                }
                /* Only reset on passing, to avoid touching corrupted buffers */
-               for_each_tracing_cpu(i)
-                       tracing_reset(tr, i);
+               tracing_reset_online_cpus(tr);
 
                printk(KERN_CONT "PASSED\n");
        }
@@ -555,9 +799,6 @@ __acquires(kernel_lock)
 
        type->next = trace_types;
        trace_types = type;
-       len = strlen(type->name);
-       if (len > max_tracer_type_len)
-               max_tracer_type_len = len;
 
  out:
        tracing_selftest_running = false;
@@ -566,7 +807,7 @@ __acquires(kernel_lock)
        if (ret || !default_bootup_tracer)
                goto out_unlock;
 
-       if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
+       if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
                goto out_unlock;
 
        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
@@ -588,14 +829,13 @@ __acquires(kernel_lock)
 void unregister_tracer(struct tracer *type)
 {
        struct tracer **t;
-       int len;
 
        mutex_lock(&trace_types_lock);
        for (t = &trace_types; *t; t = &(*t)->next) {
                if (*t == type)
                        goto found;
        }
-       pr_info("Trace %s not registered\n", type->name);
+       pr_info("Tracer %s not registered\n", type->name);
        goto out;
 
  found:
@@ -608,35 +848,46 @@ void unregister_tracer(struct tracer *type)
                        current_trace->stop(&global_trace);
                current_trace = &nop_trace;
        }
-
-       if (strlen(type->name) != max_tracer_type_len)
-               goto out;
-
-       max_tracer_type_len = 0;
-       for (t = &trace_types; *t; t = &(*t)->next) {
-               len = strlen((*t)->name);
-               if (len > max_tracer_type_len)
-                       max_tracer_type_len = len;
-       }
- out:
+out:
        mutex_unlock(&trace_types_lock);
 }
 
-void tracing_reset(struct trace_array *tr, int cpu)
+static void __tracing_reset(struct ring_buffer *buffer, int cpu)
 {
        ftrace_disable_cpu();
-       ring_buffer_reset_cpu(tr->buffer, cpu);
+       ring_buffer_reset_cpu(buffer, cpu);
        ftrace_enable_cpu();
 }
 
+void tracing_reset(struct trace_array *tr, int cpu)
+{
+       struct ring_buffer *buffer = tr->buffer;
+
+       ring_buffer_record_disable(buffer);
+
+       /* Make sure all commits have finished */
+       synchronize_sched();
+       __tracing_reset(buffer, cpu);
+
+       ring_buffer_record_enable(buffer);
+}
+
 void tracing_reset_online_cpus(struct trace_array *tr)
 {
+       struct ring_buffer *buffer = tr->buffer;
        int cpu;
 
+       ring_buffer_record_disable(buffer);
+
+       /* Make sure all commits have finished */
+       synchronize_sched();
+
        tr->time_start = ftrace_now(tr->cpu);
 
        for_each_online_cpu(cpu)
-               tracing_reset(tr, cpu);
+               __tracing_reset(buffer, cpu);
+
+       ring_buffer_record_enable(buffer);
 }
 
 void tracing_reset_current(int cpu)
@@ -655,7 +906,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
 static int cmdline_idx;
-static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 /* temporary disable recording */
 static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -667,8 +918,10 @@ static void trace_init_cmdlines(void)
        cmdline_idx = 0;
 }
 
-static int trace_stop_count;
-static DEFINE_SPINLOCK(tracing_start_lock);
+int is_tracing_stopped(void)
+{
+       return trace_stop_count;
+}
 
 /**
  * ftrace_off_permanent - disable all ftrace code permanently
@@ -709,6 +962,8 @@ void tracing_start(void)
                goto out;
        }
 
+       /* Prevent the buffers from switching */
+       arch_spin_lock(&ftrace_max_lock);
 
        buffer = global_trace.buffer;
        if (buffer)
@@ -718,6 +973,8 @@ void tracing_start(void)
        if (buffer)
                ring_buffer_record_enable(buffer);
 
+       arch_spin_unlock(&ftrace_max_lock);
+
        ftrace_start();
  out:
        spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -739,6 +996,9 @@ void tracing_stop(void)
        if (trace_stop_count++)
                goto out;
 
+       /* Prevent the buffers from switching */
+       arch_spin_lock(&ftrace_max_lock);
+
        buffer = global_trace.buffer;
        if (buffer)
                ring_buffer_record_disable(buffer);
@@ -747,6 +1007,8 @@ void tracing_stop(void)
        if (buffer)
                ring_buffer_record_disable(buffer);
 
+       arch_spin_unlock(&ftrace_max_lock);
+
  out:
        spin_unlock_irqrestore(&tracing_start_lock, flags);
 }
@@ -766,7 +1028,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
         * nor do we want to disable interrupts,
         * so if we miss here, then better luck next time.
         */
-       if (!__raw_spin_trylock(&trace_cmdline_lock))
+       if (!arch_spin_trylock(&trace_cmdline_lock))
                return;
 
        idx = map_pid_to_cmdline[tsk->pid];
@@ -791,7 +1053,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
 
        memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
 
-       __raw_spin_unlock(&trace_cmdline_lock);
+       arch_spin_unlock(&trace_cmdline_lock);
 }
 
 void trace_find_cmdline(int pid, char comm[])
@@ -803,20 +1065,25 @@ void trace_find_cmdline(int pid, char comm[])
                return;
        }
 
+       if (WARN_ON_ONCE(pid < 0)) {
+               strcpy(comm, "<XXX>");
+               return;
+       }
+
        if (pid > PID_MAX_DEFAULT) {
                strcpy(comm, "<...>");
                return;
        }
 
        preempt_disable();
-       __raw_spin_lock(&trace_cmdline_lock);
+       arch_spin_lock(&trace_cmdline_lock);
        map = map_pid_to_cmdline[pid];
        if (map != NO_CMDLINE_MAP)
                strcpy(comm, saved_cmdlines[map]);
        else
                strcpy(comm, "<...>");
 
-       __raw_spin_unlock(&trace_cmdline_lock);
+       arch_spin_unlock(&trace_cmdline_lock);
        preempt_enable();
 }
 
@@ -837,7 +1104,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
        entry->preempt_count            = pc & 0xff;
        entry->pid                      = (tsk) ? tsk->pid : 0;
-       entry->tgid                     = (tsk) ? tsk->tgid : 0;
+       entry->lock_depth               = (tsk) ? tsk->lock_depth : 0;
        entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -848,15 +1115,17 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
                ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
                (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
 }
+EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
 
-struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-                                                   int type,
-                                                   unsigned long len,
-                                                   unsigned long flags, int pc)
+struct ring_buffer_event *
+trace_buffer_lock_reserve(struct ring_buffer *buffer,
+                         int type,
+                         unsigned long len,
+                         unsigned long flags, int pc)
 {
        struct ring_buffer_event *event;
 
-       event = ring_buffer_lock_reserve(tr->buffer, len);
+       event = ring_buffer_lock_reserve(buffer, len);
        if (event != NULL) {
                struct trace_entry *ent = ring_buffer_event_data(event);
 
@@ -866,58 +1135,60 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
 
        return event;
 }
-static void ftrace_trace_stack(struct trace_array *tr,
-                              unsigned long flags, int skip, int pc);
-static void ftrace_trace_userstack(struct trace_array *tr,
-                                  unsigned long flags, int pc);
 
-static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
-                                       struct ring_buffer_event *event,
-                                       unsigned long flags, int pc,
-                                       int wake)
+static inline void
+__trace_buffer_unlock_commit(struct ring_buffer *buffer,
+                            struct ring_buffer_event *event,
+                            unsigned long flags, int pc,
+                            int wake)
 {
-       ring_buffer_unlock_commit(tr->buffer, event);
+       ring_buffer_unlock_commit(buffer, event);
 
-       ftrace_trace_stack(tr, flags, 6, pc);
-       ftrace_trace_userstack(tr, flags, pc);
+       ftrace_trace_stack(buffer, flags, 6, pc);
+       ftrace_trace_userstack(buffer, flags, pc);
 
        if (wake)
                trace_wake_up();
 }
 
-void trace_buffer_unlock_commit(struct trace_array *tr,
-                                       struct ring_buffer_event *event,
-                                       unsigned long flags, int pc)
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+                               struct ring_buffer_event *event,
+                               unsigned long flags, int pc)
 {
-       __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
+       __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(int type, unsigned long len,
+trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
+                                 int type, unsigned long len,
                                  unsigned long flags, int pc)
 {
-       return trace_buffer_lock_reserve(&global_trace,
+       *current_rb = global_trace.buffer;
+       return trace_buffer_lock_reserve(*current_rb,
                                         type, len, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
+                                       struct ring_buffer_event *event,
                                        unsigned long flags, int pc)
 {
-       __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+       __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-                                       unsigned long flags, int pc)
+void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
+                                      struct ring_buffer_event *event,
+                                      unsigned long flags, int pc)
 {
-       __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+       __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
 }
 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
 
-void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
+                                        struct ring_buffer_event *event)
 {
-       ring_buffer_discard_commit(global_trace.buffer, event);
+       ring_buffer_discard_commit(buffer, event);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
@@ -927,14 +1198,15 @@ trace_function(struct trace_array *tr,
               int pc)
 {
        struct ftrace_event_call *call = &event_function;
+       struct ring_buffer *buffer = tr->buffer;
        struct ring_buffer_event *event;
        struct ftrace_entry *entry;
 
        /* If we are reading the ring buffer, don't trace */
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
                return;
 
-       event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
+       event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
                                          flags, pc);
        if (!event)
                return;
@@ -942,58 +1214,10 @@ trace_function(struct trace_array *tr,
        entry->ip                       = ip;
        entry->parent_ip                = parent_ip;
 
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, buffer, event))
+               ring_buffer_unlock_commit(buffer, event);
 }
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int __trace_graph_entry(struct trace_array *tr,
-                               struct ftrace_graph_ent *trace,
-                               unsigned long flags,
-                               int pc)
-{
-       struct ftrace_event_call *call = &event_funcgraph_entry;
-       struct ring_buffer_event *event;
-       struct ftrace_graph_ent_entry *entry;
-
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
-               return 0;
-
-       event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return 0;
-       entry   = ring_buffer_event_data(event);
-       entry->graph_ent                        = *trace;
-       if (!filter_current_check_discard(call, entry, event))
-               ring_buffer_unlock_commit(global_trace.buffer, event);
-
-       return 1;
-}
-
-static void __trace_graph_return(struct trace_array *tr,
-                               struct ftrace_graph_ret *trace,
-                               unsigned long flags,
-                               int pc)
-{
-       struct ftrace_event_call *call = &event_funcgraph_exit;
-       struct ring_buffer_event *event;
-       struct ftrace_graph_ret_entry *entry;
-
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
-               return;
-
-       event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->ret                              = *trace;
-       if (!filter_current_check_discard(call, entry, event))
-               ring_buffer_unlock_commit(global_trace.buffer, event);
-}
-#endif
-
 void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
        unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -1003,17 +1227,17 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
                trace_function(tr, ip, parent_ip, flags, pc);
 }
 
-static void __ftrace_trace_stack(struct trace_array *tr,
+#ifdef CONFIG_STACKTRACE
+static void __ftrace_trace_stack(struct ring_buffer *buffer,
                                 unsigned long flags,
                                 int skip, int pc)
 {
-#ifdef CONFIG_STACKTRACE
        struct ftrace_event_call *call = &event_kernel_stack;
        struct ring_buffer_event *event;
        struct stack_entry *entry;
        struct stack_trace trace;
 
-       event = trace_buffer_lock_reserve(tr, TRACE_STACK,
+       event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
                                          sizeof(*entry), flags, pc);
        if (!event)
                return;
@@ -1026,32 +1250,44 @@ static void __ftrace_trace_stack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace(&trace);
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-#endif
+       if (!filter_check_discard(call, entry, buffer, event))
+               ring_buffer_unlock_commit(buffer, event);
 }
 
-static void ftrace_trace_stack(struct trace_array *tr,
-                              unsigned long flags,
-                              int skip, int pc)
+void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
+                       int skip, int pc)
 {
        if (!(trace_flags & TRACE_ITER_STACKTRACE))
                return;
 
-       __ftrace_trace_stack(tr, flags, skip, pc);
+       __ftrace_trace_stack(buffer, flags, skip, pc);
 }
 
-void __trace_stack(struct trace_array *tr,
-                  unsigned long flags,
-                  int skip, int pc)
+void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+                  int pc)
 {
-       __ftrace_trace_stack(tr, flags, skip, pc);
+       __ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
-static void ftrace_trace_userstack(struct trace_array *tr,
-                                  unsigned long flags, int pc)
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+       unsigned long flags;
+
+       if (tracing_disabled || tracing_selftest_running)
+               return;
+
+       local_save_flags(flags);
+
+       /* skipping 3 traces, seems to get us at the caller of this function */
+       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
+void
+ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
-#ifdef CONFIG_STACKTRACE
        struct ftrace_event_call *call = &event_user_stack;
        struct ring_buffer_event *event;
        struct userstack_entry *entry;
@@ -1060,12 +1296,20 @@ static void ftrace_trace_userstack(struct trace_array *tr,
        if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
                return;
 
-       event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
+       /*
+        * NMIs can not handle page faults, even with fix ups.
+        * The save user stack can (and often does) fault.
+        */
+       if (unlikely(in_nmi()))
+               return;
+
+       event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
                                          sizeof(*entry), flags, pc);
        if (!event)
                return;
        entry   = ring_buffer_event_data(event);
 
+       entry->tgid             = current->tgid;
        memset(&entry->caller, 0, sizeof(entry->caller));
 
        trace.nr_entries        = 0;
@@ -1074,9 +1318,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace_user(&trace);
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-#endif
+       if (!filter_check_discard(call, entry, buffer, event))
+               ring_buffer_unlock_commit(buffer, event);
 }
 
 #ifdef UNUSED
@@ -1086,16 +1329,20 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
 }
 #endif /* UNUSED */
 
+#endif /* CONFIG_STACKTRACE */
+
 static void
 ftrace_trace_special(void *__tr,
                     unsigned long arg1, unsigned long arg2, unsigned long arg3,
                     int pc)
 {
+       struct ftrace_event_call *call = &event_special;
        struct ring_buffer_event *event;
        struct trace_array *tr = __tr;
+       struct ring_buffer *buffer = tr->buffer;
        struct special_entry *entry;
 
-       event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
+       event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
                                          sizeof(*entry), 0, pc);
        if (!event)
                return;
@@ -1103,7 +1350,9 @@ ftrace_trace_special(void *__tr,
        entry->arg1                     = arg1;
        entry->arg2                     = arg2;
        entry->arg3                     = arg3;
-       trace_buffer_unlock_commit(tr, event, 0, pc);
+
+       if (!filter_check_discard(call, entry, buffer, event))
+               trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 
 void
@@ -1114,62 +1363,6 @@ __trace_special(void *__tr, void *__data,
 }
 
 void
-tracing_sched_switch_trace(struct trace_array *tr,
-                          struct task_struct *prev,
-                          struct task_struct *next,
-                          unsigned long flags, int pc)
-{
-       struct ftrace_event_call *call = &event_context_switch;
-       struct ring_buffer_event *event;
-       struct ctx_switch_entry *entry;
-
-       event = trace_buffer_lock_reserve(tr, TRACE_CTX,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->prev_pid                 = prev->pid;
-       entry->prev_prio                = prev->prio;
-       entry->prev_state               = prev->state;
-       entry->next_pid                 = next->pid;
-       entry->next_prio                = next->prio;
-       entry->next_state               = next->state;
-       entry->next_cpu = task_cpu(next);
-
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               trace_buffer_unlock_commit(tr, event, flags, pc);
-}
-
-void
-tracing_sched_wakeup_trace(struct trace_array *tr,
-                          struct task_struct *wakee,
-                          struct task_struct *curr,
-                          unsigned long flags, int pc)
-{
-       struct ftrace_event_call *call = &event_wakeup;
-       struct ring_buffer_event *event;
-       struct ctx_switch_entry *entry;
-
-       event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
-                                         sizeof(*entry), flags, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->prev_pid                 = curr->pid;
-       entry->prev_prio                = curr->prio;
-       entry->prev_state               = curr->state;
-       entry->next_pid                 = wakee->pid;
-       entry->next_prio                = wakee->prio;
-       entry->next_state               = wakee->state;
-       entry->next_cpu                 = task_cpu(wakee);
-
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-       ftrace_trace_stack(tr, flags, 6, pc);
-       ftrace_trace_userstack(tr, flags, pc);
-}
-
-void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
        struct trace_array *tr = &global_trace;
@@ -1193,80 +1386,19 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
        local_irq_restore(flags);
 }
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-int trace_graph_entry(struct ftrace_graph_ent *trace)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int ret;
-       int cpu;
-       int pc;
-
-       if (!ftrace_trace_task(current))
-               return 0;
-
-       if (!ftrace_graph_addr(trace->func))
-               return 0;
-
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-       if (likely(disabled == 1)) {
-               pc = preempt_count();
-               ret = __trace_graph_entry(tr, trace, flags, pc);
-       } else {
-               ret = 0;
-       }
-       /* Only do the atomic if it is not already set */
-       if (!test_tsk_trace_graph(current))
-               set_tsk_trace_graph(current);
-
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-
-       return ret;
-}
-
-void trace_graph_return(struct ftrace_graph_ret *trace)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int cpu;
-       int pc;
-
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-       if (likely(disabled == 1)) {
-               pc = preempt_count();
-               __trace_graph_return(tr, trace, flags, pc);
-       }
-       if (!trace->depth)
-               clear_tsk_trace_graph(current);
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-}
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-
 /**
  * trace_vbprintk - write binary msg to tracing buffer
  *
  */
 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
-       static raw_spinlock_t trace_buf_lock =
-               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t trace_buf_lock =
+               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
 
        struct ftrace_event_call *call = &event_bprint;
        struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
        struct bprint_entry *entry;
@@ -1292,14 +1424,16 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 
        /* Lockdep uses trace_printk for lock tracing */
        local_irq_save(flags);
-       __raw_spin_lock(&trace_buf_lock);
+       arch_spin_lock(&trace_buf_lock);
        len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
        if (len > TRACE_BUF_SIZE || len < 0)
                goto out_unlock;
 
        size = sizeof(*entry) + sizeof(u32) * len;
-       event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
+       buffer = tr->buffer;
+       event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
+                                         flags, pc);
        if (!event)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
@@ -1307,11 +1441,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        entry->fmt                      = fmt;
 
        memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, buffer, event)) {
+               ring_buffer_unlock_commit(buffer, event);
+               ftrace_trace_stack(buffer, flags, 6, pc);
+       }
 
 out_unlock:
-       __raw_spin_unlock(&trace_buf_lock);
+       arch_spin_unlock(&trace_buf_lock);
        local_irq_restore(flags);
 
 out:
@@ -1321,16 +1457,32 @@ out:
 
        return len;
 }
-EXPORT_SYMBOL_GPL(trace_vbprintk);
+EXPORT_SYMBOL_GPL(trace_vbprintk);
+
+int trace_array_printk(struct trace_array *tr,
+                      unsigned long ip, const char *fmt, ...)
+{
+       int ret;
+       va_list ap;
+
+       if (!(trace_flags & TRACE_ITER_PRINTK))
+               return 0;
+
+       va_start(ap, fmt);
+       ret = trace_array_vprintk(tr, ip, fmt, ap);
+       va_end(ap);
+       return ret;
+}
 
-int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+int trace_array_vprintk(struct trace_array *tr,
+                       unsigned long ip, const char *fmt, va_list args)
 {
-       static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
        static char trace_buf[TRACE_BUF_SIZE];
 
        struct ftrace_event_call *call = &event_print;
        struct ring_buffer_event *event;
-       struct trace_array *tr = &global_trace;
+       struct ring_buffer *buffer;
        struct trace_array_cpu *data;
        int cpu, len = 0, size, pc;
        struct print_entry *entry;
@@ -1351,26 +1503,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
        pause_graph_tracing();
        raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&trace_buf_lock);
+       arch_spin_lock(&trace_buf_lock);
        len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
-       len = min(len, TRACE_BUF_SIZE-1);
-       trace_buf[len] = 0;
-
        size = sizeof(*entry) + len + 1;
-       event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
+       buffer = tr->buffer;
+       event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+                                         irq_flags, pc);
        if (!event)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
-       entry->ip                       = ip;
+       entry->ip = ip;
 
        memcpy(&entry->buf, trace_buf, len);
-       entry->buf[len] = 0;
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
+       entry->buf[len] = '\0';
+       if (!filter_check_discard(call, entry, buffer, event)) {
+               ring_buffer_unlock_commit(buffer, event);
+               ftrace_trace_stack(buffer, irq_flags, 6, pc);
+       }
 
  out_unlock:
-       __raw_spin_unlock(&trace_buf_lock);
+       arch_spin_unlock(&trace_buf_lock);
        raw_local_irq_restore(irq_flags);
        unpause_graph_tracing();
  out:
@@ -1379,6 +1532,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
        return len;
 }
+
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+{
+       return trace_array_vprintk(&global_trace, ip, fmt, args);
+}
 EXPORT_SYMBOL_GPL(trace_vprintk);
 
 enum trace_file_type {
@@ -1399,7 +1557,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
 }
 
 static struct trace_entry *
-peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
+               unsigned long *lost_events)
 {
        struct ring_buffer_event *event;
        struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1410,7 +1569,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
        if (buf_iter)
                event = ring_buffer_iter_peek(buf_iter, ts);
        else
-               event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+               event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
+                                        lost_events);
 
        ftrace_enable_cpu();
 
@@ -1418,10 +1578,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 }
 
 static struct trace_entry *
-__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
+                 unsigned long *missing_events, u64 *ent_ts)
 {
        struct ring_buffer *buffer = iter->tr->buffer;
        struct trace_entry *ent, *next = NULL;
+       unsigned long lost_events = 0, next_lost = 0;
        int cpu_file = iter->cpu_file;
        u64 next_ts = 0, ts;
        int next_cpu = -1;
@@ -1434,7 +1596,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
        if (cpu_file > TRACE_PIPE_ALL_CPU) {
                if (ring_buffer_empty_cpu(buffer, cpu_file))
                        return NULL;
-               ent = peek_next_entry(iter, cpu_file, ent_ts);
+               ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
                if (ent_cpu)
                        *ent_cpu = cpu_file;
 
@@ -1446,7 +1608,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
                if (ring_buffer_empty_cpu(buffer, cpu))
                        continue;
 
-               ent = peek_next_entry(iter, cpu, &ts);
+               ent = peek_next_entry(iter, cpu, &ts, &lost_events);
 
                /*
                 * Pick the entry with the smallest timestamp:
@@ -1455,6 +1617,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
                        next = ent;
                        next_cpu = cpu;
                        next_ts = ts;
+                       next_lost = lost_events;
                }
        }
 
@@ -1464,6 +1627,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
        if (ent_ts)
                *ent_ts = next_ts;
 
+       if (missing_events)
+               *missing_events = next_lost;
+
        return next;
 }
 
@@ -1471,13 +1637,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
                                          int *ent_cpu, u64 *ent_ts)
 {
-       return __find_next_entry(iter, ent_cpu, ent_ts);
+       return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
 }
 
 /* Find the next real entry, and increment the iterator to the next entry */
 static void *find_next_entry_inc(struct trace_iterator *iter)
 {
-       iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
+       iter->ent = __find_next_entry(iter, &iter->cpu,
+                                     &iter->lost_events, &iter->ts);
 
        if (iter->ent)
                trace_iterator_increment(iter);
@@ -1489,7 +1656,8 @@ static void trace_consume(struct trace_iterator *iter)
 {
        /* Don't allow ftrace to trace into the ring buffers */
        ftrace_disable_cpu();
-       ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+       ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
+                           &iter->lost_events);
        ftrace_enable_cpu();
 }
 
@@ -1499,6 +1667,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
        int i = (int)*pos;
        void *ent;
 
+       WARN_ON_ONCE(iter->leftover);
+
        (*pos)++;
 
        /* can't go backwards */
@@ -1518,13 +1688,38 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
        return ent;
 }
 
+static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
+{
+       struct trace_array *tr = iter->tr;
+       struct ring_buffer_event *event;
+       struct ring_buffer_iter *buf_iter;
+       unsigned long entries = 0;
+       u64 ts;
+
+       tr->data[cpu]->skipped_entries = 0;
+
+       if (!iter->buffer_iter[cpu])
+               return;
+
+       buf_iter = iter->buffer_iter[cpu];
+       ring_buffer_iter_reset(buf_iter);
+
+       /*
+        * We could have the case with the max latency tracers
+        * that a reset never took place on a cpu. This is evident
+        * by the timestamp being before the start of the buffer.
+        */
+       while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
+               if (ts >= iter->tr->time_start)
+                       break;
+               entries++;
+               ring_buffer_read(buf_iter, NULL);
+       }
+
+       tr->data[cpu]->skipped_entries = entries;
+}
+
 /*
- * No necessary locking here. The worst thing which can
- * happen is loosing events consumed at the same time
- * by a trace_pipe reader.
- * Other than that, we don't risk to crash the ring buffer
- * because it serializes the readers.
- *
  * The current tracer is copied to avoid a global locking
  * all around.
  */
@@ -1556,28 +1751,40 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 
                if (cpu_file == TRACE_PIPE_ALL_CPU) {
                        for_each_tracing_cpu(cpu)
-                               ring_buffer_iter_reset(iter->buffer_iter[cpu]);
+                               tracing_iter_reset(iter, cpu);
                } else
-                       ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
-
+                       tracing_iter_reset(iter, cpu_file);
 
                ftrace_enable_cpu();
 
+               iter->leftover = 0;
                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
                        ;
 
        } else {
-               l = *pos - 1;
-               p = s_next(m, p, &l);
+               /*
+                * If we overflowed the seq_file before, then we want
+                * to just reuse the trace_seq buffer again.
+                */
+               if (iter->leftover)
+                       p = iter;
+               else {
+                       l = *pos - 1;
+                       p = s_next(m, p, &l);
+               }
        }
 
        trace_event_read_lock();
+       trace_access_lock(cpu_file);
        return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
+       struct trace_iterator *iter = m->private;
+
        atomic_dec(&trace_record_cmdline_disabled);
+       trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 }
 
@@ -1588,10 +1795,10 @@ static void print_lat_help_header(struct seq_file *m)
        seq_puts(m, "#                | / _----=> need-resched    \n");
        seq_puts(m, "#                || / _---=> hardirq/softirq \n");
        seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
-       seq_puts(m, "#                |||| /                      \n");
-       seq_puts(m, "#                |||||     delay             \n");
-       seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
-       seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
+       seq_puts(m, "#                |||| /_--=> lock-depth       \n");
+       seq_puts(m, "#                |||||/     delay             \n");
+       seq_puts(m, "#  cmd     pid   |||||| time  |   caller      \n");
+       seq_puts(m, "#     \\   /      ||||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
@@ -1601,23 +1808,39 @@ static void print_func_help_header(struct seq_file *m)
 }
 
 
-static void
+void
 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 {
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
        struct trace_array *tr = iter->tr;
        struct trace_array_cpu *data = tr->data[tr->cpu];
        struct tracer *type = current_trace;
-       unsigned long total;
-       unsigned long entries;
+       unsigned long entries = 0;
+       unsigned long total = 0;
+       unsigned long count;
        const char *name = "preemption";
+       int cpu;
 
        if (type)
                name = type->name;
 
-       entries = ring_buffer_entries(iter->tr->buffer);
-       total = entries +
-               ring_buffer_overruns(iter->tr->buffer);
+
+       for_each_tracing_cpu(cpu) {
+               count = ring_buffer_entries_cpu(tr->buffer, cpu);
+               /*
+                * If this buffer has skipped entries, then we hold all
+                * entries for the trace and we need to ignore the
+                * ones before the time stamp.
+                */
+               if (tr->data[cpu]->skipped_entries) {
+                       count -= tr->data[cpu]->skipped_entries;
+                       /* total is the same as the entries */
+                       total += count;
+               } else
+                       total += count +
+                               ring_buffer_overrun_cpu(tr->buffer, cpu);
+               entries += count;
+       }
 
        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
                   name, UTS_RELEASE);
@@ -1659,7 +1882,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
                seq_puts(m, "\n#  => ended at:   ");
                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
                trace_print_seq(m, &iter->seq);
-               seq_puts(m, "#\n");
+               seq_puts(m, "\n#\n");
        }
 
        seq_puts(m, "#\n");
@@ -1678,6 +1901,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
        if (cpumask_test_cpu(iter->cpu, iter->started))
                return;
 
+       if (iter->tr->data[iter->cpu]->skipped_entries)
+               return;
+
        cpumask_set_cpu(iter->cpu, iter->started);
 
        /* Don't print started cpu buffer for the first entry of the trace */
@@ -1710,7 +1936,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
        }
 
        if (event)
-               return event->trace(iter, sym_flags);
+               return event->funcs->trace(iter, sym_flags, event);
 
        if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
                goto partial;
@@ -1736,7 +1962,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 
        event = ftrace_find_event(entry->type);
        if (event)
-               return event->raw(iter, 0);
+               return event->funcs->raw(iter, 0, event);
 
        if (!trace_seq_printf(s, "%d ?\n", entry->type))
                goto partial;
@@ -1763,7 +1989,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 
        event = ftrace_find_event(entry->type);
        if (event) {
-               enum print_line_t ret = event->hex(iter, 0);
+               enum print_line_t ret = event->funcs->hex(iter, 0, event);
                if (ret != TRACE_TYPE_HANDLED)
                        return ret;
        }
@@ -1788,10 +2014,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
        }
 
        event = ftrace_find_event(entry->type);
-       return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
+       return event ? event->funcs->binary(iter, 0, event) :
+               TRACE_TYPE_HANDLED;
 }
 
-static int trace_empty(struct trace_iterator *iter)
+int trace_empty(struct trace_iterator *iter)
 {
        int cpu;
 
@@ -1826,6 +2053,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
        enum print_line_t ret;
 
+       if (iter->lost_events)
+               trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+                                iter->cpu, iter->lost_events);
+
        if (iter->trace && iter->trace->print_line) {
                ret = iter->trace->print_line(iter);
                if (ret != TRACE_TYPE_UNHANDLED)
@@ -1854,9 +2085,27 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
        return print_trace_fmt(iter);
 }
 
+void trace_default_header(struct seq_file *m)
+{
+       struct trace_iterator *iter = m->private;
+
+       if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+               /* print nothing if the buffers are empty */
+               if (trace_empty(iter))
+                       return;
+               print_trace_header(m, iter);
+               if (!(trace_flags & TRACE_ITER_VERBOSE))
+                       print_lat_help_header(m);
+       } else {
+               if (!(trace_flags & TRACE_ITER_VERBOSE))
+                       print_func_help_header(m);
+       }
+}
+
 static int s_show(struct seq_file *m, void *v)
 {
        struct trace_iterator *iter = v;
+       int ret;
 
        if (iter->ent == NULL) {
                if (iter->tr) {
@@ -1865,26 +2114,36 @@ static int s_show(struct seq_file *m, void *v)
                }
                if (iter->trace && iter->trace->print_header)
                        iter->trace->print_header(m);
-               else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
-                       /* print nothing if the buffers are empty */
-                       if (trace_empty(iter))
-                               return 0;
-                       print_trace_header(m, iter);
-                       if (!(trace_flags & TRACE_ITER_VERBOSE))
-                               print_lat_help_header(m);
-               } else {
-                       if (!(trace_flags & TRACE_ITER_VERBOSE))
-                               print_func_help_header(m);
-               }
+               else
+                       trace_default_header(m);
+
+       } else if (iter->leftover) {
+               /*
+                * If we filled the seq_file buffer earlier, we
+                * want to just show it now.
+                */
+               ret = trace_print_seq(m, &iter->seq);
+
+               /* ret should this time be zero, but you never know */
+               iter->leftover = ret;
+
        } else {
                print_trace_line(iter);
-               trace_print_seq(m, &iter->seq);
+               ret = trace_print_seq(m, &iter->seq);
+               /*
+                * If we overflow the seq_file buffer, then it will
+                * ask us for this data again at start up.
+                * Use that instead.
+                *  ret is 0 if seq_file write succeeded.
+                *        -1 otherwise.
+                */
+               iter->leftover = ret;
        }
 
        return 0;
 }
 
-static struct seq_operations tracer_seq_ops = {
+static const struct seq_operations tracer_seq_ops = {
        .start          = s_start,
        .next           = s_next,
        .stop           = s_stop,
@@ -1919,11 +2178,9 @@ __tracing_open(struct inode *inode, struct file *file)
        if (current_trace)
                *iter->trace = *current_trace;
 
-       if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+       if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
                goto fail;
 
-       cpumask_clear(iter->started);
-
        if (current_trace && current_trace->print_max)
                iter->tr = &max_tr;
        else
@@ -1940,19 +2197,28 @@ __tracing_open(struct inode *inode, struct file *file)
        if (ring_buffer_overruns(iter->tr->buffer))
                iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
+       /* stop the trace while dumping */
+       tracing_stop();
+
        if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
                for_each_tracing_cpu(cpu) {
-
                        iter->buffer_iter[cpu] =
-                               ring_buffer_read_start(iter->tr->buffer, cpu);
+                               ring_buffer_read_prepare(iter->tr->buffer, cpu);
+               }
+               ring_buffer_read_prepare_sync();
+               for_each_tracing_cpu(cpu) {
+                       ring_buffer_read_start(iter->buffer_iter[cpu]);
+                       tracing_iter_reset(iter, cpu);
                }
        } else {
                cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
-                               ring_buffer_read_start(iter->tr->buffer, cpu);
+                       ring_buffer_read_prepare(iter->tr->buffer, cpu);
+               ring_buffer_read_prepare_sync();
+               ring_buffer_read_start(iter->buffer_iter[cpu]);
+               tracing_iter_reset(iter, cpu);
        }
 
-       /* TODO stop tracer */
        ret = seq_open(file, &tracer_seq_ops);
        if (ret < 0) {
                fail_ret = ERR_PTR(ret);
@@ -1962,9 +2228,6 @@ __tracing_open(struct inode *inode, struct file *file)
        m = file->private_data;
        m->private = iter;
 
-       /* stop the trace while dumping */
-       tracing_stop();
-
        mutex_unlock(&trace_types_lock);
 
        return iter;
@@ -1975,6 +2238,7 @@ __tracing_open(struct inode *inode, struct file *file)
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
        free_cpumask_var(iter->started);
+       tracing_start();
  fail:
        mutex_unlock(&trace_types_lock);
        kfree(iter->trace);
@@ -2031,7 +2295,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 
        /* If this file was open for write, then erase contents */
        if ((file->f_mode & FMODE_WRITE) &&
-           !(file->f_flags & O_APPEND)) {
+           (file->f_flags & O_TRUNC)) {
                long cpu = (long) inode->i_private;
 
                if (cpu == TRACE_PIPE_ALL_CPU)
@@ -2096,7 +2360,7 @@ static int t_show(struct seq_file *m, void *v)
        return 0;
 }
 
-static struct seq_operations show_traces_seq_ops = {
+static const struct seq_operations show_traces_seq_ops = {
        .start          = t_start,
        .next           = t_next,
        .stop           = t_stop,
@@ -2188,7 +2452,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        mutex_lock(&tracing_cpumask_update_lock);
 
        local_irq_disable();
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
        for_each_tracing_cpu(cpu) {
                /*
                 * Increase/decrease the disabled counter if we are
@@ -2203,7 +2467,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
                        atomic_dec(&global_trace.data[cpu]->disabled);
                }
        }
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
        local_irq_enable();
 
        cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2225,101 +2489,67 @@ static const struct file_operations tracing_cpumask_fops = {
        .write          = tracing_cpumask_write,
 };
 
-static ssize_t
-tracing_trace_options_read(struct file *filp, char __user *ubuf,
-                      size_t cnt, loff_t *ppos)
+static int tracing_trace_options_show(struct seq_file *m, void *v)
 {
        struct tracer_opt *trace_opts;
        u32 tracer_flags;
-       int len = 0;
-       char *buf;
-       int r = 0;
        int i;
 
-
-       /* calculate max size */
-       for (i = 0; trace_options[i]; i++) {
-               len += strlen(trace_options[i]);
-               len += 3; /* "no" and newline */
-       }
-
        mutex_lock(&trace_types_lock);
        tracer_flags = current_trace->flags->val;
        trace_opts = current_trace->flags->opts;
 
-       /*
-        * Increase the size with names of options specific
-        * of the current tracer.
-        */
-       for (i = 0; trace_opts[i].name; i++) {
-               len += strlen(trace_opts[i].name);
-               len += 3; /* "no" and newline */
-       }
-
-       /* +2 for \n and \0 */
-       buf = kmalloc(len + 2, GFP_KERNEL);
-       if (!buf) {
-               mutex_unlock(&trace_types_lock);
-               return -ENOMEM;
-       }
-
        for (i = 0; trace_options[i]; i++) {
                if (trace_flags & (1 << i))
-                       r += sprintf(buf + r, "%s\n", trace_options[i]);
+                       seq_printf(m, "%s\n", trace_options[i]);
                else
-                       r += sprintf(buf + r, "no%s\n", trace_options[i]);
+                       seq_printf(m, "no%s\n", trace_options[i]);
        }
 
        for (i = 0; trace_opts[i].name; i++) {
                if (tracer_flags & trace_opts[i].bit)
-                       r += sprintf(buf + r, "%s\n",
-                               trace_opts[i].name);
+                       seq_printf(m, "%s\n", trace_opts[i].name);
                else
-                       r += sprintf(buf + r, "no%s\n",
-                               trace_opts[i].name);
+                       seq_printf(m, "no%s\n", trace_opts[i].name);
        }
        mutex_unlock(&trace_types_lock);
 
-       WARN_ON(r >= len + 2);
+       return 0;
+}
 
-       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+static int __set_tracer_option(struct tracer *trace,
+                              struct tracer_flags *tracer_flags,
+                              struct tracer_opt *opts, int neg)
+{
+       int ret;
 
-       kfree(buf);
-       return r;
+       ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+       if (ret)
+               return ret;
+
+       if (neg)
+               tracer_flags->val &= ~opts->bit;
+       else
+               tracer_flags->val |= opts->bit;
+       return 0;
 }
 
 /* Try to assign a tracer specific option */
 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 {
-       struct tracer_flags *trace_flags = trace->flags;
+       struct tracer_flags *tracer_flags = trace->flags;
        struct tracer_opt *opts = NULL;
-       int ret = 0, i = 0;
-       int len;
+       int i;
 
-       for (i = 0; trace_flags->opts[i].name; i++) {
-               opts = &trace_flags->opts[i];
-               len = strlen(opts->name);
+       for (i = 0; tracer_flags->opts[i].name; i++) {
+               opts = &tracer_flags->opts[i];
 
-               if (strncmp(cmp, opts->name, len) == 0) {
-                       ret = trace->set_flag(trace_flags->val,
-                               opts->bit, !neg);
-                       break;
-               }
+               if (strcmp(cmp, opts->name) == 0)
+                       return __set_tracer_option(trace, trace->flags,
+                                                  opts, neg);
        }
-       /* Not found */
-       if (!trace_flags->opts[i].name)
-               return -EINVAL;
-
-       /* Refused to handle */
-       if (ret)
-               return ret;
-
-       if (neg)
-               trace_flags->val &= ~opts->bit;
-       else
-               trace_flags->val |= opts->bit;
 
-       return 0;
+       return -EINVAL;
 }
 
 static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2332,22 +2562,6 @@ static void set_tracer_flags(unsigned int mask, int enabled)
                trace_flags |= mask;
        else
                trace_flags &= ~mask;
-
-       if (mask == TRACE_ITER_GLOBAL_CLK) {
-               u64 (*func)(void);
-
-               if (enabled)
-                       func = trace_clock_global;
-               else
-                       func = trace_clock_local;
-
-               mutex_lock(&trace_types_lock);
-               ring_buffer_set_clock(global_trace.buffer, func);
-
-               if (max_tr.buffer)
-                       ring_buffer_set_clock(max_tr.buffer, func);
-               mutex_unlock(&trace_types_lock);
-       }
 }
 
 static ssize_t
@@ -2355,7 +2569,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
 {
        char buf[64];
-       char *cmp = buf;
+       char *cmp;
        int neg = 0;
        int ret;
        int i;
@@ -2367,16 +2581,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                return -EFAULT;
 
        buf[cnt] = 0;
+       cmp = strstrip(buf);
 
-       if (strncmp(buf, "no", 2) == 0) {
+       if (strncmp(cmp, "no", 2) == 0) {
                neg = 1;
                cmp += 2;
        }
 
        for (i = 0; trace_options[i]; i++) {
-               int len = strlen(trace_options[i]);
-
-               if (strncmp(cmp, trace_options[i], len) == 0) {
+               if (strcmp(cmp, trace_options[i]) == 0) {
                        set_tracer_flags(1 << i, !neg);
                        break;
                }
@@ -2391,14 +2604,23 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        return ret;
        }
 
-       filp->f_pos += cnt;
+       *ppos += cnt;
 
        return cnt;
 }
 
+static int tracing_trace_options_open(struct inode *inode, struct file *file)
+{
+       if (tracing_disabled)
+               return -ENODEV;
+       return single_open(file, tracing_trace_options_show, NULL);
+}
+
 static const struct file_operations tracing_iter_fops = {
-       .open           = tracing_open_generic,
-       .read           = tracing_trace_options_read,
+       .open           = tracing_trace_options_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
        .write          = tracing_trace_options_write,
 };
 
@@ -2533,7 +2755,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
        }
        mutex_unlock(&trace_types_lock);
 
-       filp->f_pos += cnt;
+       *ppos += cnt;
 
        return cnt;
 }
@@ -2542,7 +2764,7 @@ static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
                       size_t cnt, loff_t *ppos)
 {
-       char buf[max_tracer_type_len+2];
+       char buf[MAX_TRACER_SIZE+2];
        int r;
 
        mutex_lock(&trace_types_lock);
@@ -2692,15 +2914,15 @@ static ssize_t
 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
 {
-       char buf[max_tracer_type_len+1];
+       char buf[MAX_TRACER_SIZE+1];
        int i;
        size_t ret;
        int err;
 
        ret = cnt;
 
-       if (cnt > max_tracer_type_len)
-               cnt = max_tracer_type_len;
+       if (cnt > MAX_TRACER_SIZE)
+               cnt = MAX_TRACER_SIZE;
 
        if (copy_from_user(&buf, ubuf, cnt))
                return -EFAULT;
@@ -2715,7 +2937,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
        if (err)
                return err;
 
-       filp->f_pos += ret;
+       *ppos += ret;
 
        return ret;
 }
@@ -2772,22 +2994,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
        mutex_lock(&trace_types_lock);
 
-       /* We only allow one reader per cpu */
-       if (cpu_file == TRACE_PIPE_ALL_CPU) {
-               if (!cpumask_empty(tracing_reader_cpumask)) {
-                       ret = -EBUSY;
-                       goto out;
-               }
-               cpumask_setall(tracing_reader_cpumask);
-       } else {
-               if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
-                       cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
-               else {
-                       ret = -EBUSY;
-                       goto out;
-               }
-       }
-
        /* create a buffer to store the information to pass to userspace */
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
        if (!iter) {
@@ -2843,10 +3049,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 
        mutex_lock(&trace_types_lock);
 
-       if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
-               cpumask_clear(tracing_reader_cpumask);
-       else
-               cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+       if (iter->trace->pipe_close)
+               iter->trace->pipe_close(iter);
 
        mutex_unlock(&trace_types_lock);
 
@@ -3006,6 +3210,7 @@ waitagain:
        iter->pos = -1;
 
        trace_event_read_lock();
+       trace_access_lock(iter->cpu_file);
        while (find_next_entry_inc(iter) != NULL) {
                enum print_line_t ret;
                int len = iter->seq.len;
@@ -3022,6 +3227,7 @@ waitagain:
                if (iter->seq.len >= cnt)
                        break;
        }
+       trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 
        /* Now copy what we have to the user */
@@ -3054,7 +3260,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
        __free_page(spd->pages[idx]);
 }
 
-static struct pipe_buf_operations tracing_pipe_buf_ops = {
+static const struct pipe_buf_operations tracing_pipe_buf_ops = {
        .can_merge              = 0,
        .map                    = generic_pipe_buf_map,
        .unmap                  = generic_pipe_buf_unmap,
@@ -3104,12 +3310,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
                                        size_t len,
                                        unsigned int flags)
 {
-       struct page *pages[PIPE_BUFFERS];
-       struct partial_page partial[PIPE_BUFFERS];
+       struct page *pages_def[PIPE_DEF_BUFFERS];
+       struct partial_page partial_def[PIPE_DEF_BUFFERS];
        struct trace_iterator *iter = filp->private_data;
        struct splice_pipe_desc spd = {
-               .pages          = pages,
-               .partial        = partial,
+               .pages          = pages_def,
+               .partial        = partial_def,
                .nr_pages       = 0, /* This gets updated below. */
                .flags          = flags,
                .ops            = &tracing_pipe_buf_ops,
@@ -3120,6 +3326,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
        size_t rem;
        unsigned int i;
 
+       if (splice_grow_spd(pipe, &spd))
+               return -ENOMEM;
+
        /* copy the tracer to avoid using a global lock all around */
        mutex_lock(&trace_types_lock);
        if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3147,40 +3356,44 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
        }
 
        trace_event_read_lock();
+       trace_access_lock(iter->cpu_file);
 
        /* Fill as many pages as possible. */
-       for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
-               pages[i] = alloc_page(GFP_KERNEL);
-               if (!pages[i])
+       for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
+               spd.pages[i] = alloc_page(GFP_KERNEL);
+               if (!spd.pages[i])
                        break;
 
                rem = tracing_fill_pipe_page(rem, iter);
 
                /* Copy the data into the page, so we can start over. */
                ret = trace_seq_to_buffer(&iter->seq,
-                                         page_address(pages[i]),
+                                         page_address(spd.pages[i]),
                                          iter->seq.len);
                if (ret < 0) {
-                       __free_page(pages[i]);
+                       __free_page(spd.pages[i]);
                        break;
                }
-               partial[i].offset = 0;
-               partial[i].len = iter->seq.len;
+               spd.partial[i].offset = 0;
+               spd.partial[i].len = iter->seq.len;
 
                trace_seq_init(&iter->seq);
        }
 
+       trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
        mutex_unlock(&iter->mutex);
 
        spd.nr_pages = i;
 
-       return splice_to_pipe(pipe, &spd);
+       ret = splice_to_pipe(pipe, &spd);
+out:
+       splice_shrink_spd(pipe, &spd);
+       return ret;
 
 out_err:
        mutex_unlock(&iter->mutex);
-
-       return ret;
+       goto out;
 }
 
 static ssize_t
@@ -3250,7 +3463,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
                }
        }
 
-       filp->f_pos += cnt;
+       *ppos += cnt;
 
        /* If check pages failed, return ENOMEM */
        if (tracing_disabled)
@@ -3285,7 +3498,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
                                        size_t cnt, loff_t *fpos)
 {
        char *buf;
-       char *end;
 
        if (tracing_disabled)
                return -EINVAL;
@@ -3293,7 +3505,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        if (cnt > TRACE_BUF_SIZE)
                cnt = TRACE_BUF_SIZE;
 
-       buf = kmalloc(cnt + 1, GFP_KERNEL);
+       buf = kmalloc(cnt + 2, GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;
 
@@ -3301,20 +3513,79 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
                kfree(buf);
                return -EFAULT;
        }
+       if (buf[cnt-1] != '\n') {
+               buf[cnt] = '\n';
+               buf[cnt+1] = '\0';
+       } else
+               buf[cnt] = '\0';
 
-       /* Cut from the first nil or newline. */
-       buf[cnt] = '\0';
-       end = strchr(buf, '\n');
-       if (end)
-               *end = '\0';
-
-       cnt = mark_printk("%s\n", buf);
+       cnt = mark_printk("%s", buf);
        kfree(buf);
        *fpos += cnt;
 
        return cnt;
 }
 
+static int tracing_clock_show(struct seq_file *m, void *v)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
+               seq_printf(m,
+                       "%s%s%s%s", i ? " " : "",
+                       i == trace_clock_id ? "[" : "", trace_clocks[i].name,
+                       i == trace_clock_id ? "]" : "");
+       seq_putc(m, '\n');
+
+       return 0;
+}
+
+static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
+                                  size_t cnt, loff_t *fpos)
+{
+       char buf[64];
+       const char *clockstr;
+       int i;
+
+       if (cnt >= sizeof(buf))
+               return -EINVAL;
+
+       if (copy_from_user(&buf, ubuf, cnt))
+               return -EFAULT;
+
+       buf[cnt] = 0;
+
+       clockstr = strstrip(buf);
+
+       for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
+               if (strcmp(trace_clocks[i].name, clockstr) == 0)
+                       break;
+       }
+       if (i == ARRAY_SIZE(trace_clocks))
+               return -EINVAL;
+
+       trace_clock_id = i;
+
+       mutex_lock(&trace_types_lock);
+
+       ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
+       if (max_tr.buffer)
+               ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
+
+       mutex_unlock(&trace_types_lock);
+
+       *fpos += cnt;
+
+       return cnt;
+}
+
+static int tracing_clock_open(struct inode *inode, struct file *file)
+{
+       if (tracing_disabled)
+               return -ENODEV;
+       return single_open(file, tracing_clock_show, NULL);
+}
+
 static const struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_max_lat_read,
@@ -3352,6 +3623,14 @@ static const struct file_operations tracing_mark_fops = {
        .write          = tracing_mark_write,
 };
 
+static const struct file_operations trace_clock_fops = {
+       .open           = tracing_clock_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+       .write          = tracing_clock_write,
+};
+
 struct ftrace_buffer_info {
        struct trace_array      *tr;
        void                    *spare;
@@ -3387,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
                     size_t count, loff_t *ppos)
 {
        struct ftrace_buffer_info *info = filp->private_data;
-       unsigned int pos;
        ssize_t ret;
        size_t size;
 
@@ -3405,18 +3683,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 
        info->read = 0;
 
+       trace_access_lock(info->cpu);
        ret = ring_buffer_read_page(info->tr->buffer,
                                    &info->spare,
                                    count,
                                    info->cpu, 0);
+       trace_access_unlock(info->cpu);
        if (ret < 0)
                return 0;
 
-       pos = ring_buffer_page_len(info->spare);
-
-       if (pos < PAGE_SIZE)
-               memset(info->spare + pos, 0, PAGE_SIZE - pos);
-
 read:
        size = PAGE_SIZE - info->read;
        if (size > count)
@@ -3478,7 +3753,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 }
 
 /* Pipe buffer operations for a buffer. */
-static struct pipe_buf_operations buffer_pipe_buf_ops = {
+static const struct pipe_buf_operations buffer_pipe_buf_ops = {
        .can_merge              = 0,
        .map                    = generic_pipe_buf_map,
        .unmap                  = generic_pipe_buf_unmap,
@@ -3511,11 +3786,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                            unsigned int flags)
 {
        struct ftrace_buffer_info *info = file->private_data;
-       struct partial_page partial[PIPE_BUFFERS];
-       struct page *pages[PIPE_BUFFERS];
+       struct partial_page partial_def[PIPE_DEF_BUFFERS];
+       struct page *pages_def[PIPE_DEF_BUFFERS];
        struct splice_pipe_desc spd = {
-               .pages          = pages,
-               .partial        = partial,
+               .pages          = pages_def,
+               .partial        = partial_def,
                .flags          = flags,
                .ops            = &buffer_pipe_buf_ops,
                .spd_release    = buffer_spd_release,
@@ -3524,21 +3799,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
        int entries, size, i;
        size_t ret;
 
+       if (splice_grow_spd(pipe, &spd))
+               return -ENOMEM;
+
        if (*ppos & (PAGE_SIZE - 1)) {
                WARN_ONCE(1, "Ftrace: previous read must page-align\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        if (len & (PAGE_SIZE - 1)) {
                WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
-               if (len < PAGE_SIZE)
-                       return -EINVAL;
+               if (len < PAGE_SIZE) {
+                       ret = -EINVAL;
+                       goto out;
+               }
                len &= PAGE_MASK;
        }
 
+       trace_access_lock(info->cpu);
        entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 
-       for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
+       for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
                int r;
 
@@ -3583,6 +3865,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
        }
 
+       trace_access_unlock(info->cpu);
        spd.nr_pages = i;
 
        /* did we read anything? */
@@ -3592,11 +3875,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                else
                        ret = 0;
                /* TODO: block */
-               return ret;
+               goto out;
        }
 
        ret = splice_to_pipe(pipe, &spd);
-
+       splice_shrink_spd(pipe, &spd);
+out:
        return ret;
 }
 
@@ -3619,7 +3903,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (!s)
-               return ENOMEM;
+               return -ENOMEM;
 
        trace_seq_init(s);
 
@@ -3632,9 +3916,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
        cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
 
-       cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
-       trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
-
        count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
        kfree(s);
@@ -3812,39 +4093,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (ret < 0)
                return ret;
 
-       ret = 0;
-       switch (val) {
-       case 0:
-               /* do nothing if already cleared */
-               if (!(topt->flags->val & topt->opt->bit))
-                       break;
-
-               mutex_lock(&trace_types_lock);
-               if (current_trace->set_flag)
-                       ret = current_trace->set_flag(topt->flags->val,
-                                                     topt->opt->bit, 0);
-               mutex_unlock(&trace_types_lock);
-               if (ret)
-                       return ret;
-               topt->flags->val &= ~topt->opt->bit;
-               break;
-       case 1:
-               /* do nothing if already set */
-               if (topt->flags->val & topt->opt->bit)
-                       break;
+       if (val != 0 && val != 1)
+               return -EINVAL;
 
+       if (!!(topt->flags->val & topt->opt->bit) != val) {
                mutex_lock(&trace_types_lock);
-               if (current_trace->set_flag)
-                       ret = current_trace->set_flag(topt->flags->val,
-                                                     topt->opt->bit, 1);
+               ret = __set_tracer_option(current_trace, topt->flags,
+                                         topt->opt, !val);
                mutex_unlock(&trace_types_lock);
                if (ret)
                        return ret;
-               topt->flags->val |= topt->opt->bit;
-               break;
-
-       default:
-               return -EINVAL;
        }
 
        *ppos += cnt;
@@ -3895,17 +4153,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (ret < 0)
                return ret;
 
-       switch (val) {
-       case 0:
-               trace_flags &= ~(1 << index);
-               break;
-       case 1:
-               trace_flags |= 1 << index;
-               break;
-
-       default:
+       if (val != 0 && val != 1)
                return -EINVAL;
-       }
+       set_tracer_flags(1 << index, val);
 
        *ppos += cnt;
 
@@ -4053,6 +4303,8 @@ static __init int tracer_init_debugfs(void)
        struct dentry *d_tracer;
        int cpu;
 
+       trace_access_lock_init();
+
        d_tracer = tracing_init_dentry();
 
        trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4073,8 +4325,10 @@ static __init int tracer_init_debugfs(void)
        trace_create_file("current_tracer", 0644, d_tracer,
                        &global_trace, &set_tracer_fops);
 
+#ifdef CONFIG_TRACER_MAX_TRACE
        trace_create_file("tracing_max_latency", 0644, d_tracer,
                        &tracing_max_latency, &tracing_max_lat_fops);
+#endif
 
        trace_create_file("tracing_thresh", 0644, d_tracer,
                        &tracing_thresh, &tracing_max_lat_fops);
@@ -4094,6 +4348,9 @@ static __init int tracer_init_debugfs(void)
        trace_create_file("saved_cmdlines", 0444, d_tracer,
                        NULL, &tracing_saved_cmdlines_fops);
 
+       trace_create_file("trace_clock", 0644, d_tracer, NULL,
+                         &trace_clock_fops);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4114,7 +4371,7 @@ static int trace_panic_handler(struct notifier_block *this,
                               unsigned long event, void *unused)
 {
        if (ftrace_dump_on_oops)
-               ftrace_dump();
+               ftrace_dump(ftrace_dump_on_oops);
        return NOTIFY_OK;
 }
 
@@ -4131,7 +4388,7 @@ static int trace_die_handler(struct notifier_block *self,
        switch (val) {
        case DIE_OOPS:
                if (ftrace_dump_on_oops)
-                       ftrace_dump();
+                       ftrace_dump(ftrace_dump_on_oops);
                break;
        default:
                break;
@@ -4172,10 +4429,11 @@ trace_printk_seq(struct trace_seq *s)
        trace_seq_init(s);
 }
 
-static void __ftrace_dump(bool disable_tracing)
+static void
+__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
 {
-       static raw_spinlock_t ftrace_dump_lock =
-               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t ftrace_dump_lock =
+               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
        unsigned int old_userobj;
@@ -4185,7 +4443,7 @@ static void __ftrace_dump(bool disable_tracing)
 
        /* only one dump */
        local_irq_save(flags);
-       __raw_spin_lock(&ftrace_dump_lock);
+       arch_spin_lock(&ftrace_dump_lock);
        if (dump_ran)
                goto out;
 
@@ -4205,12 +4463,25 @@ static void __ftrace_dump(bool disable_tracing)
        /* don't look at user memory in panic mode */
        trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
 
-       printk(KERN_TRACE "Dumping ftrace buffer:\n");
-
        /* Simulate the iterator */
        iter.tr = &global_trace;
        iter.trace = current_trace;
-       iter.cpu_file = TRACE_PIPE_ALL_CPU;
+
+       switch (oops_dump_mode) {
+       case DUMP_ALL:
+               iter.cpu_file = TRACE_PIPE_ALL_CPU;
+               break;
+       case DUMP_ORIG:
+               iter.cpu_file = raw_smp_processor_id();
+               break;
+       case DUMP_NONE:
+               goto out_enable;
+       default:
+               printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
+               iter.cpu_file = TRACE_PIPE_ALL_CPU;
+       }
+
+       printk(KERN_TRACE "Dumping ftrace buffer:\n");
 
        /*
         * We need to stop all tracing on all CPUS to read the
@@ -4249,6 +4520,7 @@ static void __ftrace_dump(bool disable_tracing)
        else
                printk(KERN_TRACE "---------------------------------\n");
 
+ out_enable:
        /* Re-enable tracing if requested */
        if (!disable_tracing) {
                trace_flags |= old_userobj;
@@ -4260,19 +4532,18 @@ static void __ftrace_dump(bool disable_tracing)
        }
 
  out:
-       __raw_spin_unlock(&ftrace_dump_lock);
+       arch_spin_unlock(&ftrace_dump_lock);
        local_irq_restore(flags);
 }
 
 /* By default: disable tracing after the dump */
-void ftrace_dump(void)
+void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 {
-       __ftrace_dump(true);
+       __ftrace_dump(true, oops_dump_mode);
 }
 
 __init static int tracer_alloc_buffers(void)
 {
-       struct trace_array_cpu *data;
        int ring_buf_size;
        int i;
        int ret = -ENOMEM;
@@ -4283,9 +4554,6 @@ __init static int tracer_alloc_buffers(void)
        if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
                goto out_free_buffer_mask;
 
-       if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
-               goto out_free_tracing_cpumask;
-
        /* To save memory, keep the ring buffer size to its minimum */
        if (ring_buffer_expanded)
                ring_buf_size = trace_buf_size;
@@ -4294,7 +4562,6 @@ __init static int tracer_alloc_buffers(void)
 
        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
        cpumask_copy(tracing_cpumask, cpu_all_mask);
-       cpumask_clear(tracing_reader_cpumask);
 
        /* TODO: make the number of buffers hot pluggable with CPUS */
        global_trace.buffer = ring_buffer_alloc(ring_buf_size,
@@ -4322,8 +4589,8 @@ __init static int tracer_alloc_buffers(void)
 
        /* Allocate the first page for all buffers */
        for_each_tracing_cpu(i) {
-               data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-               max_tr.data[i] = &per_cpu(max_data, i);
+               global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+               max_tr.data[i] = &per_cpu(max_tr_data, i);
        }
 
        trace_init_cmdlines();
@@ -4344,8 +4611,6 @@ __init static int tracer_alloc_buffers(void)
        return 0;
 
 out_free_cpumask:
-       free_cpumask_var(tracing_reader_cpumask);
-out_free_tracing_cpumask:
        free_cpumask_var(tracing_cpumask);
 out_free_buffer_mask:
        free_cpumask_var(tracing_buffer_mask);