Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
[safe/jmp/linux-2.6] / kernel / trace / trace.c
index dc937e1..ed01fdb 100644 (file)
@@ -12,7 +12,7 @@
  *  Copyright (C) 2004 William Lee Irwin III
  */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
@@ -32,6 +32,7 @@
 #include <linux/splice.h>
 #include <linux/kdebug.h>
 #include <linux/string.h>
+#include <linux/rwsem.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -86,25 +87,22 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
  */
 static int tracing_disabled = 1;
 
-DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+DEFINE_PER_CPU(int, ftrace_cpu_disabled);
 
 static inline void ftrace_disable_cpu(void)
 {
        preempt_disable();
-       local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_inc(ftrace_cpu_disabled);
 }
 
 static inline void ftrace_enable_cpu(void)
 {
-       local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_dec(ftrace_cpu_disabled);
        preempt_enable();
 }
 
 static cpumask_var_t __read_mostly     tracing_buffer_mask;
 
-/* Define which cpu buffers are currently read in trace_pipe */
-static cpumask_var_t                   tracing_reader_cpumask;
-
 #define for_each_tracing_cpu(cpu)      \
        for_each_cpu(cpu, tracing_buffer_mask)
 
@@ -203,7 +201,7 @@ cycle_t ftrace_now(int cpu)
  */
 static struct trace_array      max_tr;
 
-static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
 
 /* tracer_enabled is used to toggle activation of a tracer */
 static int                     tracer_enabled = 1;
@@ -243,12 +241,91 @@ static struct tracer              *current_trace __read_mostly;
 
 /*
  * trace_types_lock is used to protect the trace_types list.
- * This lock is also used to keep user access serialized.
- * Accesses from userspace will grab this lock while userspace
- * activities happen inside the kernel.
  */
 static DEFINE_MUTEX(trace_types_lock);
 
+/*
+ * serialize the access of the ring buffer
+ *
+ * ring buffer serializes readers, but it is low level protection.
+ * The validity of the events (which returns by ring_buffer_peek() ..etc)
+ * are not protected by ring buffer.
+ *
+ * The content of events may become garbage if we allow other process consumes
+ * these events concurrently:
+ *   A) the page of the consumed events may become a normal page
+ *      (not reader page) in ring buffer, and this page will be rewrited
+ *      by events producer.
+ *   B) The page of the consumed events may become a page for splice_read,
+ *      and this page will be returned to system.
+ *
+ * These primitives allow multi process access to different cpu ring buffer
+ * concurrently.
+ *
+ * These primitives don't distinguish read-only and read-consume access.
+ * Multi read-only access are also serialized.
+ */
+
+#ifdef CONFIG_SMP
+static DECLARE_RWSEM(all_cpu_access_lock);
+static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+       if (cpu == TRACE_PIPE_ALL_CPU) {
+               /* gain it for accessing the whole ring buffer. */
+               down_write(&all_cpu_access_lock);
+       } else {
+               /* gain it for accessing a cpu ring buffer. */
+
+               /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+               down_read(&all_cpu_access_lock);
+
+               /* Secondly block other access to this @cpu ring buffer. */
+               mutex_lock(&per_cpu(cpu_access_lock, cpu));
+       }
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+       if (cpu == TRACE_PIPE_ALL_CPU) {
+               up_write(&all_cpu_access_lock);
+       } else {
+               mutex_unlock(&per_cpu(cpu_access_lock, cpu));
+               up_read(&all_cpu_access_lock);
+       }
+}
+
+static inline void trace_access_lock_init(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               mutex_init(&per_cpu(cpu_access_lock, cpu));
+}
+
+#else
+
+static DEFINE_MUTEX(access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+       (void)cpu;
+       mutex_lock(&access_lock);
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+       (void)cpu;
+       mutex_unlock(&access_lock);
+}
+
+static inline void trace_access_lock_init(void)
+{
+}
+
+#endif
+
 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 
@@ -313,7 +390,6 @@ static const char *trace_options[] = {
        "bin",
        "block",
        "stacktrace",
-       "sched-tree",
        "trace_printk",
        "ftrace_preempt",
        "branch",
@@ -493,15 +569,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
  * protected by per_cpu spinlocks. But the action of the swap
  * needs its own lock.
  *
- * This is defined as a raw_spinlock_t in order to help
+ * This is defined as a arch_spinlock_t in order to help
  * with performance when lockdep debugging is enabled.
  *
  * It is also used in other places outside the update_max_tr
  * so it needs to be defined outside of the
  * CONFIG_TRACER_MAX_TRACE.
  */
-static raw_spinlock_t ftrace_max_lock =
-       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t ftrace_max_lock =
+       (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 unsigned long __read_mostly    tracing_max_latency;
@@ -555,13 +631,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
 
        tr->buffer = max_tr.buffer;
        max_tr.buffer = buf;
 
        __update_max_tr(tr, tsk, cpu);
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
 }
 
 /**
@@ -581,7 +657,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
 
        ftrace_disable_cpu();
 
@@ -603,7 +679,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
 
        __update_max_tr(tr, tsk, cpu);
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
@@ -802,7 +878,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
 static int cmdline_idx;
-static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 /* temporary disable recording */
 static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +991,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
         * nor do we want to disable interrupts,
         * so if we miss here, then better luck next time.
         */
-       if (!__raw_spin_trylock(&trace_cmdline_lock))
+       if (!arch_spin_trylock(&trace_cmdline_lock))
                return;
 
        idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +1016,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
 
        memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
 
-       __raw_spin_unlock(&trace_cmdline_lock);
+       arch_spin_unlock(&trace_cmdline_lock);
 }
 
 void trace_find_cmdline(int pid, char comm[])
@@ -952,20 +1028,25 @@ void trace_find_cmdline(int pid, char comm[])
                return;
        }
 
+       if (WARN_ON_ONCE(pid < 0)) {
+               strcpy(comm, "<XXX>");
+               return;
+       }
+
        if (pid > PID_MAX_DEFAULT) {
                strcpy(comm, "<...>");
                return;
        }
 
        preempt_disable();
-       __raw_spin_lock(&trace_cmdline_lock);
+       arch_spin_lock(&trace_cmdline_lock);
        map = map_pid_to_cmdline[pid];
        if (map != NO_CMDLINE_MAP)
                strcpy(comm, saved_cmdlines[map]);
        else
                strcpy(comm, "<...>");
 
-       __raw_spin_unlock(&trace_cmdline_lock);
+       arch_spin_unlock(&trace_cmdline_lock);
        preempt_enable();
 }
 
@@ -1085,7 +1166,7 @@ trace_function(struct trace_array *tr,
        struct ftrace_entry *entry;
 
        /* If we are reading the ring buffer, don't trace */
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
                return;
 
        event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1232,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
        __ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+       unsigned long flags;
+
+       if (tracing_disabled || tracing_selftest_running)
+               return;
+
+       local_save_flags(flags);
+
+       /* skipping 3 traces, seems to get us at the caller of this function */
+       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
@@ -1251,8 +1348,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
  */
 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
-       static raw_spinlock_t trace_buf_lock =
-               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t trace_buf_lock =
+               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
 
        struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1380,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 
        /* Lockdep uses trace_printk for lock tracing */
        local_irq_save(flags);
-       __raw_spin_lock(&trace_buf_lock);
+       arch_spin_lock(&trace_buf_lock);
        len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
        if (len > TRACE_BUF_SIZE || len < 0)
@@ -1300,11 +1397,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        entry->fmt                      = fmt;
 
        memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-       if (!filter_check_discard(call, entry, buffer, event))
+       if (!filter_check_discard(call, entry, buffer, event)) {
                ring_buffer_unlock_commit(buffer, event);
+               ftrace_trace_stack(buffer, flags, 6, pc);
+       }
 
 out_unlock:
-       __raw_spin_unlock(&trace_buf_lock);
+       arch_spin_unlock(&trace_buf_lock);
        local_irq_restore(flags);
 
 out:
@@ -1334,7 +1433,7 @@ int trace_array_printk(struct trace_array *tr,
 int trace_array_vprintk(struct trace_array *tr,
                        unsigned long ip, const char *fmt, va_list args)
 {
-       static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
        static char trace_buf[TRACE_BUF_SIZE];
 
        struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1459,8 @@ int trace_array_vprintk(struct trace_array *tr,
 
        pause_graph_tracing();
        raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&trace_buf_lock);
-       if (args == NULL) {
-               strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
-               len = strlen(trace_buf);
-       } else
-               len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+       arch_spin_lock(&trace_buf_lock);
+       len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
        size = sizeof(*entry) + len + 1;
        buffer = tr->buffer;
@@ -1378,11 +1473,13 @@ int trace_array_vprintk(struct trace_array *tr,
 
        memcpy(&entry->buf, trace_buf, len);
        entry->buf[len] = '\0';
-       if (!filter_check_discard(call, entry, buffer, event))
+       if (!filter_check_discard(call, entry, buffer, event)) {
                ring_buffer_unlock_commit(buffer, event);
+               ftrace_trace_stack(buffer, irq_flags, 6, pc);
+       }
 
  out_unlock:
-       __raw_spin_unlock(&trace_buf_lock);
+       arch_spin_unlock(&trace_buf_lock);
        raw_local_irq_restore(irq_flags);
        unpause_graph_tracing();
  out:
@@ -1516,6 +1613,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
        int i = (int)*pos;
        void *ent;
 
+       WARN_ON_ONCE(iter->leftover);
+
        (*pos)++;
 
        /* can't go backwards */
@@ -1567,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
 }
 
 /*
- * No necessary locking here. The worst thing which can
- * happen is loosing events consumed at the same time
- * by a trace_pipe reader.
- * Other than that, we don't risk to crash the ring buffer
- * because it serializes the readers.
- *
  * The current tracer is copied to avoid a global locking
  * all around.
  */
@@ -1614,17 +1707,29 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                        ;
 
        } else {
-               l = *pos - 1;
-               p = s_next(m, p, &l);
+               /*
+                * If we overflowed the seq_file before, then we want
+                * to just reuse the trace_seq buffer again.
+                */
+               if (iter->leftover)
+                       p = iter;
+               else {
+                       l = *pos - 1;
+                       p = s_next(m, p, &l);
+               }
        }
 
        trace_event_read_lock();
+       trace_access_lock(cpu_file);
        return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
+       struct trace_iterator *iter = m->private;
+
        atomic_dec(&trace_record_cmdline_disabled);
+       trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 }
 
@@ -1923,6 +2028,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 static int s_show(struct seq_file *m, void *v)
 {
        struct trace_iterator *iter = v;
+       int ret;
 
        if (iter->ent == NULL) {
                if (iter->tr) {
@@ -1942,9 +2048,27 @@ static int s_show(struct seq_file *m, void *v)
                        if (!(trace_flags & TRACE_ITER_VERBOSE))
                                print_func_help_header(m);
                }
+       } else if (iter->leftover) {
+               /*
+                * If we filled the seq_file buffer earlier, we
+                * want to just show it now.
+                */
+               ret = trace_print_seq(m, &iter->seq);
+
+               /* ret should this time be zero, but you never know */
+               iter->leftover = ret;
+
        } else {
                print_trace_line(iter);
-               trace_print_seq(m, &iter->seq);
+               ret = trace_print_seq(m, &iter->seq);
+               /*
+                * If we overflow the seq_file buffer, then it will
+                * ask us for this data again at start up.
+                * Use that instead.
+                *  ret is 0 if seq_file write succeeded.
+                *        -1 otherwise.
+                */
+               iter->leftover = ret;
        }
 
        return 0;
@@ -2254,7 +2378,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        mutex_lock(&tracing_cpumask_update_lock);
 
        local_irq_disable();
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
        for_each_tracing_cpu(cpu) {
                /*
                 * Increase/decrease the disabled counter if we are
@@ -2269,7 +2393,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
                        atomic_dec(&global_trace.data[cpu]->disabled);
                }
        }
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
        local_irq_enable();
 
        cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2291,67 +2415,49 @@ static const struct file_operations tracing_cpumask_fops = {
        .write          = tracing_cpumask_write,
 };
 
-static ssize_t
-tracing_trace_options_read(struct file *filp, char __user *ubuf,
-                      size_t cnt, loff_t *ppos)
+static int tracing_trace_options_show(struct seq_file *m, void *v)
 {
        struct tracer_opt *trace_opts;
        u32 tracer_flags;
-       int len = 0;
-       char *buf;
-       int r = 0;
        int i;
 
-
-       /* calculate max size */
-       for (i = 0; trace_options[i]; i++) {
-               len += strlen(trace_options[i]);
-               len += 3; /* "no" and newline */
-       }
-
        mutex_lock(&trace_types_lock);
        tracer_flags = current_trace->flags->val;
        trace_opts = current_trace->flags->opts;
 
-       /*
-        * Increase the size with names of options specific
-        * of the current tracer.
-        */
-       for (i = 0; trace_opts[i].name; i++) {
-               len += strlen(trace_opts[i].name);
-               len += 3; /* "no" and newline */
-       }
-
-       /* +1 for \0 */
-       buf = kmalloc(len + 1, GFP_KERNEL);
-       if (!buf) {
-               mutex_unlock(&trace_types_lock);
-               return -ENOMEM;
-       }
-
        for (i = 0; trace_options[i]; i++) {
                if (trace_flags & (1 << i))
-                       r += sprintf(buf + r, "%s\n", trace_options[i]);
+                       seq_printf(m, "%s\n", trace_options[i]);
                else
-                       r += sprintf(buf + r, "no%s\n", trace_options[i]);
+                       seq_printf(m, "no%s\n", trace_options[i]);
        }
 
        for (i = 0; trace_opts[i].name; i++) {
                if (tracer_flags & trace_opts[i].bit)
-                       r += sprintf(buf + r, "%s\n",
-                               trace_opts[i].name);
+                       seq_printf(m, "%s\n", trace_opts[i].name);
                else
-                       r += sprintf(buf + r, "no%s\n",
-                               trace_opts[i].name);
+                       seq_printf(m, "no%s\n", trace_opts[i].name);
        }
        mutex_unlock(&trace_types_lock);
 
-       WARN_ON(r >= len + 1);
+       return 0;
+}
 
-       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+static int __set_tracer_option(struct tracer *trace,
+                              struct tracer_flags *tracer_flags,
+                              struct tracer_opt *opts, int neg)
+{
+       int ret;
 
-       kfree(buf);
-       return r;
+       ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+       if (ret)
+               return ret;
+
+       if (neg)
+               tracer_flags->val &= ~opts->bit;
+       else
+               tracer_flags->val |= opts->bit;
+       return 0;
 }
 
 /* Try to assign a tracer specific option */
@@ -2359,33 +2465,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 {
        struct tracer_flags *tracer_flags = trace->flags;
        struct tracer_opt *opts = NULL;
-       int ret = 0, i = 0;
-       int len;
+       int i;
 
        for (i = 0; tracer_flags->opts[i].name; i++) {
                opts = &tracer_flags->opts[i];
-               len = strlen(opts->name);
 
-               if (strncmp(cmp, opts->name, len) == 0) {
-                       ret = trace->set_flag(tracer_flags->val,
-                               opts->bit, !neg);
-                       break;
-               }
+               if (strcmp(cmp, opts->name) == 0)
+                       return __set_tracer_option(trace, trace->flags,
+                                                  opts, neg);
        }
-       /* Not found */
-       if (!tracer_flags->opts[i].name)
-               return -EINVAL;
-
-       /* Refused to handle */
-       if (ret)
-               return ret;
-
-       if (neg)
-               tracer_flags->val &= ~opts->bit;
-       else
-               tracer_flags->val |= opts->bit;
 
-       return 0;
+       return -EINVAL;
 }
 
 static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2405,7 +2495,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
 {
        char buf[64];
-       char *cmp = buf;
+       char *cmp;
        int neg = 0;
        int ret;
        int i;
@@ -2417,16 +2507,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                return -EFAULT;
 
        buf[cnt] = 0;
+       cmp = strstrip(buf);
 
-       if (strncmp(buf, "no", 2) == 0) {
+       if (strncmp(cmp, "no", 2) == 0) {
                neg = 1;
                cmp += 2;
        }
 
        for (i = 0; trace_options[i]; i++) {
-               int len = strlen(trace_options[i]);
-
-               if (strncmp(cmp, trace_options[i], len) == 0) {
+               if (strcmp(cmp, trace_options[i]) == 0) {
                        set_tracer_flags(1 << i, !neg);
                        break;
                }
@@ -2446,9 +2535,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
 
+static int tracing_trace_options_open(struct inode *inode, struct file *file)
+{
+       if (tracing_disabled)
+               return -ENODEV;
+       return single_open(file, tracing_trace_options_show, NULL);
+}
+
 static const struct file_operations tracing_iter_fops = {
-       .open           = tracing_open_generic,
-       .read           = tracing_trace_options_read,
+       .open           = tracing_trace_options_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
        .write          = tracing_trace_options_write,
 };
 
@@ -2822,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
        mutex_lock(&trace_types_lock);
 
-       /* We only allow one reader per cpu */
-       if (cpu_file == TRACE_PIPE_ALL_CPU) {
-               if (!cpumask_empty(tracing_reader_cpumask)) {
-                       ret = -EBUSY;
-                       goto out;
-               }
-               cpumask_setall(tracing_reader_cpumask);
-       } else {
-               if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
-                       cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
-               else {
-                       ret = -EBUSY;
-                       goto out;
-               }
-       }
-
        /* create a buffer to store the information to pass to userspace */
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
        if (!iter) {
@@ -2893,12 +2975,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 
        mutex_lock(&trace_types_lock);
 
-       if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
-               cpumask_clear(tracing_reader_cpumask);
-       else
-               cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
-
-
        if (iter->trace->pipe_close)
                iter->trace->pipe_close(iter);
 
@@ -3060,6 +3136,7 @@ waitagain:
        iter->pos = -1;
 
        trace_event_read_lock();
+       trace_access_lock(iter->cpu_file);
        while (find_next_entry_inc(iter) != NULL) {
                enum print_line_t ret;
                int len = iter->seq.len;
@@ -3076,6 +3153,7 @@ waitagain:
                if (iter->seq.len >= cnt)
                        break;
        }
+       trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 
        /* Now copy what we have to the user */
@@ -3108,7 +3186,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
        __free_page(spd->pages[idx]);
 }
 
-static struct pipe_buf_operations tracing_pipe_buf_ops = {
+static const struct pipe_buf_operations tracing_pipe_buf_ops = {
        .can_merge              = 0,
        .map                    = generic_pipe_buf_map,
        .unmap                  = generic_pipe_buf_unmap,
@@ -3201,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
        }
 
        trace_event_read_lock();
+       trace_access_lock(iter->cpu_file);
 
        /* Fill as many pages as possible. */
        for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3224,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
                trace_seq_init(&iter->seq);
        }
 
+       trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
        mutex_unlock(&iter->mutex);
 
@@ -3324,6 +3404,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
 
+static int mark_printk(const char *fmt, ...)
+{
+       int ret;
+       va_list args;
+       va_start(args, fmt);
+       ret = trace_vprintk(0, fmt, args);
+       va_end(args);
+       return ret;
+}
+
 static ssize_t
 tracing_mark_write(struct file *filp, const char __user *ubuf,
                                        size_t cnt, loff_t *fpos)
@@ -3350,28 +3440,25 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        } else
                buf[cnt] = '\0';
 
-       cnt = trace_vprintk(0, buf, NULL);
+       cnt = mark_printk("%s", buf);
        kfree(buf);
        *fpos += cnt;
 
        return cnt;
 }
 
-static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
-                                 size_t cnt, loff_t *ppos)
+static int tracing_clock_show(struct seq_file *m, void *v)
 {
-       char buf[64];
-       int bufiter = 0;
        int i;
 
        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
-               bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+               seq_printf(m,
                        "%s%s%s%s", i ? " " : "",
                        i == trace_clock_id ? "[" : "", trace_clocks[i].name,
                        i == trace_clock_id ? "]" : "");
-       bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+       seq_putc(m, '\n');
 
-       return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+       return 0;
 }
 
 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3413,6 +3500,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
 
+static int tracing_clock_open(struct inode *inode, struct file *file)
+{
+       if (tracing_disabled)
+               return -ENODEV;
+       return single_open(file, tracing_clock_show, NULL);
+}
+
 static const struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_max_lat_read,
@@ -3451,8 +3545,10 @@ static const struct file_operations tracing_mark_fops = {
 };
 
 static const struct file_operations trace_clock_fops = {
-       .open           = tracing_open_generic,
-       .read           = tracing_clock_read,
+       .open           = tracing_clock_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
        .write          = tracing_clock_write,
 };
 
@@ -3509,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 
        info->read = 0;
 
+       trace_access_lock(info->cpu);
        ret = ring_buffer_read_page(info->tr->buffer,
                                    &info->spare,
                                    count,
                                    info->cpu, 0);
+       trace_access_unlock(info->cpu);
        if (ret < 0)
                return 0;
 
@@ -3582,7 +3680,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 }
 
 /* Pipe buffer operations for a buffer. */
-static struct pipe_buf_operations buffer_pipe_buf_ops = {
+static const struct pipe_buf_operations buffer_pipe_buf_ops = {
        .can_merge              = 0,
        .map                    = generic_pipe_buf_map,
        .unmap                  = generic_pipe_buf_unmap,
@@ -3640,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                len &= PAGE_MASK;
        }
 
+       trace_access_lock(info->cpu);
        entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 
        for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3687,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
        }
 
+       trace_access_unlock(info->cpu);
        spd.nr_pages = i;
 
        /* did we read anything? */
@@ -3913,39 +4013,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (ret < 0)
                return ret;
 
-       ret = 0;
-       switch (val) {
-       case 0:
-               /* do nothing if already cleared */
-               if (!(topt->flags->val & topt->opt->bit))
-                       break;
-
-               mutex_lock(&trace_types_lock);
-               if (current_trace->set_flag)
-                       ret = current_trace->set_flag(topt->flags->val,
-                                                     topt->opt->bit, 0);
-               mutex_unlock(&trace_types_lock);
-               if (ret)
-                       return ret;
-               topt->flags->val &= ~topt->opt->bit;
-               break;
-       case 1:
-               /* do nothing if already set */
-               if (topt->flags->val & topt->opt->bit)
-                       break;
+       if (val != 0 && val != 1)
+               return -EINVAL;
 
+       if (!!(topt->flags->val & topt->opt->bit) != val) {
                mutex_lock(&trace_types_lock);
-               if (current_trace->set_flag)
-                       ret = current_trace->set_flag(topt->flags->val,
-                                                     topt->opt->bit, 1);
+               ret = __set_tracer_option(current_trace, topt->flags,
+                                         topt->opt, !val);
                mutex_unlock(&trace_types_lock);
                if (ret)
                        return ret;
-               topt->flags->val |= topt->opt->bit;
-               break;
-
-       default:
-               return -EINVAL;
        }
 
        *ppos += cnt;
@@ -4146,6 +4223,8 @@ static __init int tracer_init_debugfs(void)
        struct dentry *d_tracer;
        int cpu;
 
+       trace_access_lock_init();
+
        d_tracer = tracing_init_dentry();
 
        trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4272,8 +4351,8 @@ trace_printk_seq(struct trace_seq *s)
 
 static void __ftrace_dump(bool disable_tracing)
 {
-       static raw_spinlock_t ftrace_dump_lock =
-               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t ftrace_dump_lock =
+               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
        unsigned int old_userobj;
@@ -4283,7 +4362,7 @@ static void __ftrace_dump(bool disable_tracing)
 
        /* only one dump */
        local_irq_save(flags);
-       __raw_spin_lock(&ftrace_dump_lock);
+       arch_spin_lock(&ftrace_dump_lock);
        if (dump_ran)
                goto out;
 
@@ -4358,7 +4437,7 @@ static void __ftrace_dump(bool disable_tracing)
        }
 
  out:
-       __raw_spin_unlock(&ftrace_dump_lock);
+       arch_spin_unlock(&ftrace_dump_lock);
        local_irq_restore(flags);
 }
 
@@ -4380,9 +4459,6 @@ __init static int tracer_alloc_buffers(void)
        if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
                goto out_free_buffer_mask;
 
-       if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
-               goto out_free_tracing_cpumask;
-
        /* To save memory, keep the ring buffer size to its minimum */
        if (ring_buffer_expanded)
                ring_buf_size = trace_buf_size;
@@ -4419,7 +4495,7 @@ __init static int tracer_alloc_buffers(void)
        /* Allocate the first page for all buffers */
        for_each_tracing_cpu(i) {
                global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-               max_tr.data[i] = &per_cpu(max_data, i);
+               max_tr.data[i] = &per_cpu(max_tr_data, i);
        }
 
        trace_init_cmdlines();
@@ -4440,8 +4516,6 @@ __init static int tracer_alloc_buffers(void)
        return 0;
 
 out_free_cpumask:
-       free_cpumask_var(tracing_reader_cpumask);
-out_free_tracing_cpumask:
        free_cpumask_var(tracing_cpumask);
 out_free_buffer_mask:
        free_cpumask_var(tracing_buffer_mask);