Update /debug/tracing/README
[safe/jmp/linux-2.6] / kernel / trace / trace.c
index 8c6a902..9d28476 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/percpu.h>
 #include <linux/splice.h>
 #include <linux/kdebug.h>
+#include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -45,6 +46,12 @@ unsigned long __read_mostly  tracing_max_latency;
 unsigned long __read_mostly    tracing_thresh;
 
 /*
+ * On boot up, the ring buffer is set to the minimum size, so that
+ * we do not waste memory on systems that are not using tracing.
+ */
+static int ring_buffer_expanded;
+
+/*
  * We need to change this state when a selftest is running.
  * A selftest will lurk into the ring-buffer to count the
  * entries inserted during the selftest although some concurrent
@@ -128,6 +135,8 @@ static int __init set_ftrace(char *str)
 {
        strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
        default_bootup_tracer = bootup_tracer_buf;
+       /* We are using ftrace early, expand it */
+       ring_buffer_expanded = 1;
        return 1;
 }
 __setup("ftrace=", set_ftrace);
@@ -139,21 +148,13 @@ static int __init set_ftrace_dump_on_oops(char *str)
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
 {
        nsec += 500;
        do_div(nsec, 1000);
        return nsec;
 }
 
-cycle_t ftrace_now(int cpu)
-{
-       u64 ts = ring_buffer_time_stamp(cpu);
-       ring_buffer_normalize_time_stamp(cpu, &ts);
-       return ts;
-}
-
 /*
  * The global_trace is the descriptor that holds the tracing
  * buffers for the live tracing. For each CPU, it contains
@@ -170,6 +171,20 @@ static struct trace_array  global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
+cycle_t ftrace_now(int cpu)
+{
+       u64 ts;
+
+       /* Early boot up does not have a buffer yet */
+       if (!global_trace.buffer)
+               return trace_clock_local();
+
+       ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
+       ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
+
+       return ts;
+}
+
 /*
  * The max_tr is used to snapshot the global_trace when a maximum
  * latency is reached. Some tracers will use this to store a maximum
@@ -240,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-       TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
+       TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -300,6 +315,8 @@ static const char *trace_options[] = {
        "printk-msg-only",
        "context-info",
        "latency-format",
+       "global-clock",
+       "sleep-time",
        NULL
 };
 
@@ -366,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
        return cnt;
 }
 
-ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
+static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 {
        int len;
        void *ret;
@@ -625,6 +642,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 }
 
 #define SAVED_CMDLINES 128
+#define NO_CMDLINE_MAP UINT_MAX
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
@@ -636,8 +654,8 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
 
 static void trace_init_cmdlines(void)
 {
-       memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
-       memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
+       memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
+       memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
        cmdline_idx = 0;
 }
 
@@ -729,8 +747,7 @@ void trace_stop_cmdline_recording(void);
 
 static void trace_save_cmdline(struct task_struct *tsk)
 {
-       unsigned map;
-       unsigned idx;
+       unsigned pid, idx;
 
        if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
                return;
@@ -745,13 +762,20 @@ static void trace_save_cmdline(struct task_struct *tsk)
                return;
 
        idx = map_pid_to_cmdline[tsk->pid];
-       if (idx >= SAVED_CMDLINES) {
+       if (idx == NO_CMDLINE_MAP) {
                idx = (cmdline_idx + 1) % SAVED_CMDLINES;
 
-               map = map_cmdline_to_pid[idx];
-               if (map <= PID_MAX_DEFAULT)
-                       map_pid_to_cmdline[map] = (unsigned)-1;
+               /*
+                * Check whether the cmdline buffer at idx has a pid
+                * mapped. We are going to overwrite that entry so we
+                * need to clear the map_pid_to_cmdline. Otherwise we
+                * would read the new comm for the old pid.
+                */
+               pid = map_cmdline_to_pid[idx];
+               if (pid != NO_CMDLINE_MAP)
+                       map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
 
+               map_cmdline_to_pid[idx] = tsk->pid;
                map_pid_to_cmdline[tsk->pid] = idx;
 
                cmdline_idx = idx;
@@ -762,30 +786,34 @@ static void trace_save_cmdline(struct task_struct *tsk)
        __raw_spin_unlock(&trace_cmdline_lock);
 }
 
-char *trace_find_cmdline(int pid)
+void trace_find_cmdline(int pid, char comm[])
 {
-       char *cmdline = "<...>";
        unsigned map;
 
-       if (!pid)
-               return "<idle>";
+       if (!pid) {
+               strcpy(comm, "<idle>");
+               return;
+       }
 
-       if (pid > PID_MAX_DEFAULT)
-               goto out;
+       if (pid > PID_MAX_DEFAULT) {
+               strcpy(comm, "<...>");
+               return;
+       }
 
+       __raw_spin_lock(&trace_cmdline_lock);
        map = map_pid_to_cmdline[pid];
-       if (map >= SAVED_CMDLINES)
-               goto out;
-
-       cmdline = saved_cmdlines[map];
+       if (map != NO_CMDLINE_MAP)
+               strcpy(comm, saved_cmdlines[map]);
+       else
+               strcpy(comm, "<...>");
 
- out:
-       return cmdline;
+       __raw_spin_unlock(&trace_cmdline_lock);
 }
 
 void tracing_record_cmdline(struct task_struct *tsk)
 {
-       if (atomic_read(&trace_record_cmdline_disabled))
+       if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
+           !tracing_is_on())
                return;
 
        trace_save_cmdline(tsk);
@@ -833,15 +861,25 @@ static void ftrace_trace_stack(struct trace_array *tr,
 static void ftrace_trace_userstack(struct trace_array *tr,
                                   unsigned long flags, int pc);
 
-void trace_buffer_unlock_commit(struct trace_array *tr,
-                               struct ring_buffer_event *event,
-                               unsigned long flags, int pc)
+static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
+                                       struct ring_buffer_event *event,
+                                       unsigned long flags, int pc,
+                                       int wake)
 {
        ring_buffer_unlock_commit(tr->buffer, event);
 
        ftrace_trace_stack(tr, flags, 6, pc);
        ftrace_trace_userstack(tr, flags, pc);
-       trace_wake_up();
+
+       if (wake)
+               trace_wake_up();
+}
+
+void trace_buffer_unlock_commit(struct trace_array *tr,
+                                       struct ring_buffer_event *event,
+                                       unsigned long flags, int pc)
+{
+       __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
 }
 
 struct ring_buffer_event *
@@ -855,7 +893,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
                                        unsigned long flags, int pc)
 {
-       return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
+       return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+}
+
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+                                       unsigned long flags, int pc)
+{
+       return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
 }
 
 void
@@ -881,7 +925,7 @@ trace_function(struct trace_array *tr,
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void __trace_graph_entry(struct trace_array *tr,
+static int __trace_graph_entry(struct trace_array *tr,
                                struct ftrace_graph_ent *trace,
                                unsigned long flags,
                                int pc)
@@ -890,15 +934,17 @@ static void __trace_graph_entry(struct trace_array *tr,
        struct ftrace_graph_ent_entry *entry;
 
        if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
-               return;
+               return 0;
 
        event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
                                          sizeof(*entry), flags, pc);
        if (!event)
-               return;
+               return 0;
        entry   = ring_buffer_event_data(event);
        entry->graph_ent                        = *trace;
        ring_buffer_unlock_commit(global_trace.buffer, event);
+
+       return 1;
 }
 
 static void __trace_graph_return(struct trace_array *tr,
@@ -1119,6 +1165,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
        struct trace_array_cpu *data;
        unsigned long flags;
        long disabled;
+       int ret;
        int cpu;
        int pc;
 
@@ -1134,15 +1181,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
        disabled = atomic_inc_return(&data->disabled);
        if (likely(disabled == 1)) {
                pc = preempt_count();
-               __trace_graph_entry(tr, trace, flags, pc);
+               ret = __trace_graph_entry(tr, trace, flags, pc);
+       } else {
+               ret = 0;
        }
        /* Only do the atomic if it is not already set */
        if (!test_tsk_trace_graph(current))
                set_tsk_trace_graph(current);
+
        atomic_dec(&data->disabled);
        local_irq_restore(flags);
 
-       return 1;
+       return ret;
 }
 
 void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -1171,18 +1221,19 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 
 
 /**
- * trace_vprintk - write binary msg to tracing buffer
+ * trace_vbprintk - write binary msg to tracing buffer
  *
  */
-int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
-       static DEFINE_SPINLOCK(trace_buf_lock);
+       static raw_spinlock_t trace_buf_lock =
+               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
 
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
-       struct print_entry *entry;
+       struct bprint_entry *entry;
        unsigned long flags;
        int resched;
        int cpu, len = 0, size, pc;
@@ -1201,26 +1252,28 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
        if (unlikely(atomic_read(&data->disabled)))
                goto out;
 
-       spin_lock_irqsave(&trace_buf_lock, flags);
+       /* Lockdep uses trace_printk for lock tracing */
+       local_irq_save(flags);
+       __raw_spin_lock(&trace_buf_lock);
        len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
        if (len > TRACE_BUF_SIZE || len < 0)
                goto out_unlock;
 
        size = sizeof(*entry) + sizeof(u32) * len;
-       event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc);
+       event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
        if (!event)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
        entry->ip                       = ip;
-       entry->depth                    = depth;
        entry->fmt                      = fmt;
 
        memcpy(entry->buf, trace_buf, sizeof(u32) * len);
        ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
-       spin_unlock_irqrestore(&trace_buf_lock, flags);
+       __raw_spin_unlock(&trace_buf_lock);
+       local_irq_restore(flags);
 
 out:
        ftrace_preempt_enable(resched);
@@ -1228,6 +1281,59 @@ out:
 
        return len;
 }
+EXPORT_SYMBOL_GPL(trace_vbprintk);
+
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+{
+       static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+       static char trace_buf[TRACE_BUF_SIZE];
+
+       struct ring_buffer_event *event;
+       struct trace_array *tr = &global_trace;
+       struct trace_array_cpu *data;
+       int cpu, len = 0, size, pc;
+       struct print_entry *entry;
+       unsigned long irq_flags;
+
+       if (tracing_disabled || tracing_selftest_running)
+               return 0;
+
+       pc = preempt_count();
+       preempt_disable_notrace();
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+
+       if (unlikely(atomic_read(&data->disabled)))
+               goto out;
+
+       pause_graph_tracing();
+       raw_local_irq_save(irq_flags);
+       __raw_spin_lock(&trace_buf_lock);
+       len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+       len = min(len, TRACE_BUF_SIZE-1);
+       trace_buf[len] = 0;
+
+       size = sizeof(*entry) + len + 1;
+       event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
+       if (!event)
+               goto out_unlock;
+       entry = ring_buffer_event_data(event);
+       entry->ip                       = ip;
+
+       memcpy(&entry->buf, trace_buf, len);
+       entry->buf[len] = 0;
+       ring_buffer_unlock_commit(tr->buffer, event);
+
+ out_unlock:
+       __raw_spin_unlock(&trace_buf_lock);
+       raw_local_irq_restore(irq_flags);
+       unpause_graph_tracing();
+ out:
+       preempt_enable_notrace();
+
+       return len;
+}
 EXPORT_SYMBOL_GPL(trace_vprintk);
 
 enum trace_file_type {
@@ -1466,11 +1572,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
        total = entries +
                ring_buffer_overruns(iter->tr->buffer);
 
-       seq_printf(m, "%s latency trace v1.1.5 on %s\n",
+       seq_printf(m, "%s latency trace v1.1.5 on %s\n",
                   name, UTS_RELEASE);
-       seq_puts(m, "-----------------------------------"
+       seq_puts(m, "-----------------------------------"
                 "---------------------------------\n");
-       seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
+       seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
                   nsecs_to_usecs(data->saved_latency),
                   entries,
@@ -1492,24 +1598,24 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 #else
        seq_puts(m, ")\n");
 #endif
-       seq_puts(m, "    -----------------\n");
-       seq_printf(m, "    | task: %.16s-%d "
+       seq_puts(m, "#    -----------------\n");
+       seq_printf(m, "#    | task: %.16s-%d "
                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
                   data->comm, data->pid, data->uid, data->nice,
                   data->policy, data->rt_priority);
-       seq_puts(m, "    -----------------\n");
+       seq_puts(m, "#    -----------------\n");
 
        if (data->critical_start) {
-               seq_puts(m, " => started at: ");
+               seq_puts(m, " => started at: ");
                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
                trace_print_seq(m, &iter->seq);
-               seq_puts(m, "\n => ended at:   ");
+               seq_puts(m, "\n => ended at:   ");
                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
                trace_print_seq(m, &iter->seq);
-               seq_puts(m, "\n");
+               seq_puts(m, "#\n");
        }
 
-       seq_puts(m, "\n");
+       seq_puts(m, "#\n");
 }
 
 static void test_cpu_buff_start(struct trace_iterator *iter)
@@ -1526,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
                return;
 
        cpumask_set_cpu(iter->cpu, iter->started);
-       trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+       /* Don't print started cpu buffer for the first entry of the trace */
+       if (iter->idx > 1)
+               trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+                               iter->cpu);
 }
 
 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1616,22 +1726,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
        return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
-{
-       struct trace_seq *s = &iter->seq;
-       struct trace_entry *entry = iter->ent;
-       struct print_entry *field;
-       int ret;
-
-       trace_assign_type(field, entry);
-
-       ret = trace_seq_bprintf(s, field->fmt, field->buf);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
@@ -1654,6 +1748,19 @@ static int trace_empty(struct trace_iterator *iter)
 {
        int cpu;
 
+       /* If we are looking at one CPU buffer, only check that one */
+       if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
+               cpu = iter->cpu_file;
+               if (iter->buffer_iter[cpu]) {
+                       if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+                               return 0;
+               } else {
+                       if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+                               return 0;
+               }
+               return 1;
+       }
+
        for_each_tracing_cpu(cpu) {
                if (iter->buffer_iter[cpu]) {
                        if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
@@ -1677,10 +1784,15 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
                        return ret;
        }
 
+       if (iter->ent->type == TRACE_BPRINT &&
+                       trace_flags & TRACE_ITER_PRINTK &&
+                       trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+               return trace_print_bprintk_msg_only(iter);
+
        if (iter->ent->type == TRACE_PRINT &&
                        trace_flags & TRACE_ITER_PRINTK &&
                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
-               return print_printk_msg_only(iter);
+               return trace_print_printk_msg_only(iter);
 
        if (trace_flags & TRACE_ITER_BIN)
                return print_bin_fmt(iter);
@@ -1759,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
        if (current_trace)
                *iter->trace = *current_trace;
 
+       if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+               goto fail;
+
+       cpumask_clear(iter->started);
+
        if (current_trace && current_trace->print_max)
                iter->tr = &max_tr;
        else
@@ -1780,17 +1897,11 @@ __tracing_open(struct inode *inode, struct file *file)
 
                        iter->buffer_iter[cpu] =
                                ring_buffer_read_start(iter->tr->buffer, cpu);
-
-                       if (!iter->buffer_iter[cpu])
-                               goto fail_buffer;
                }
        } else {
                cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
                                ring_buffer_read_start(iter->tr->buffer, cpu);
-
-               if (!iter->buffer_iter[cpu])
-                       goto fail;
        }
 
        /* TODO stop tracer */
@@ -1815,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
                if (iter->buffer_iter[cpu])
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
+       free_cpumask_var(iter->started);
  fail:
        mutex_unlock(&trace_types_lock);
        kfree(iter->trace);
@@ -1835,9 +1947,14 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
 static int tracing_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = (struct seq_file *)file->private_data;
-       struct trace_iterator *iter = m->private;
+       struct trace_iterator *iter;
        int cpu;
 
+       if (!(file->f_mode & FMODE_READ))
+               return 0;
+
+       iter = m->private;
+
        mutex_lock(&trace_types_lock);
        for_each_tracing_cpu(cpu) {
                if (iter->buffer_iter[cpu])
@@ -1853,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
 
        seq_release(inode, file);
        mutex_destroy(&iter->mutex);
+       free_cpumask_var(iter->started);
        kfree(iter->trace);
        kfree(iter);
        return 0;
@@ -1863,12 +1981,24 @@ static int tracing_open(struct inode *inode, struct file *file)
        struct trace_iterator *iter;
        int ret = 0;
 
-       iter = __tracing_open(inode, file);
-       if (IS_ERR(iter))
-               ret = PTR_ERR(iter);
-       else if (trace_flags & TRACE_ITER_LATENCY_FMT)
-               iter->iter_flags |= TRACE_FILE_LAT_FMT;
+       /* If this file was open for write, then erase contents */
+       if ((file->f_mode & FMODE_WRITE) &&
+           !(file->f_flags & O_APPEND)) {
+               long cpu = (long) inode->i_private;
+
+               if (cpu == TRACE_PIPE_ALL_CPU)
+                       tracing_reset_online_cpus(&global_trace);
+               else
+                       tracing_reset(&global_trace, cpu);
+       }
 
+       if (file->f_mode & FMODE_READ) {
+               iter = __tracing_open(inode, file);
+               if (IS_ERR(iter))
+                       ret = PTR_ERR(iter);
+               else if (trace_flags & TRACE_ITER_LATENCY_FMT)
+                       iter->iter_flags |= TRACE_FILE_LAT_FMT;
+       }
        return ret;
 }
 
@@ -1943,9 +2073,17 @@ static int show_traces_open(struct inode *inode, struct file *file)
        return ret;
 }
 
+static ssize_t
+tracing_write_stub(struct file *filp, const char __user *ubuf,
+                  size_t count, loff_t *ppos)
+{
+       return count;
+}
+
 static const struct file_operations tracing_fops = {
        .open           = tracing_open,
        .read           = seq_read,
+       .write          = tracing_write_stub,
        .llseek         = seq_lseek,
        .release        = tracing_release,
 };
@@ -2146,6 +2284,34 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
        return 0;
 }
 
+static void set_tracer_flags(unsigned int mask, int enabled)
+{
+       /* do nothing if flag is already set */
+       if (!!(trace_flags & mask) == !!enabled)
+               return;
+
+       if (enabled)
+               trace_flags |= mask;
+       else
+               trace_flags &= ~mask;
+
+       if (mask == TRACE_ITER_GLOBAL_CLK) {
+               u64 (*func)(void);
+
+               if (enabled)
+                       func = trace_clock_global;
+               else
+                       func = trace_clock_local;
+
+               mutex_lock(&trace_types_lock);
+               ring_buffer_set_clock(global_trace.buffer, func);
+
+               if (max_tr.buffer)
+                       ring_buffer_set_clock(max_tr.buffer, func);
+               mutex_unlock(&trace_types_lock);
+       }
+}
+
 static ssize_t
 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
@@ -2173,10 +2339,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                int len = strlen(trace_options[i]);
 
                if (strncmp(cmp, trace_options[i], len) == 0) {
-                       if (neg)
-                               trace_flags &= ~(1 << i);
-                       else
-                               trace_flags |= (1 << i);
+                       set_tracer_flags(1 << i, !neg);
                        break;
                }
        }
@@ -2206,9 +2369,9 @@ static const char readme_msg[] =
        "# mkdir /debug\n"
        "# mount -t debugfs nodev /debug\n\n"
        "# cat /debug/tracing/available_tracers\n"
-       "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+       "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
        "# cat /debug/tracing/current_tracer\n"
-       "none\n"
+       "nop\n"
        "# echo sched_switch > /debug/tracing/current_tracer\n"
        "# cat /debug/tracing/current_tracer\n"
        "sched_switch\n"
@@ -2311,6 +2474,75 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
        return t->init(tr);
 }
 
+static int tracing_resize_ring_buffer(unsigned long size)
+{
+       int ret;
+
+       /*
+        * If kernel or user changes the size of the ring buffer
+        * we use the size that was given, and we can forget about
+        * expanding it later.
+        */
+       ring_buffer_expanded = 1;
+
+       ret = ring_buffer_resize(global_trace.buffer, size);
+       if (ret < 0)
+               return ret;
+
+       ret = ring_buffer_resize(max_tr.buffer, size);
+       if (ret < 0) {
+               int r;
+
+               r = ring_buffer_resize(global_trace.buffer,
+                                      global_trace.entries);
+               if (r < 0) {
+                       /*
+                        * AARGH! We are left with different
+                        * size max buffer!!!!
+                        * The max buffer is our "snapshot" buffer.
+                        * When a tracer needs a snapshot (one of the
+                        * latency tracers), it swaps the max buffer
+                        * with the saved snap shot. We succeeded to
+                        * update the size of the main buffer, but failed to
+                        * update the size of the max buffer. But when we tried
+                        * to reset the main buffer to the original size, we
+                        * failed there too. This is very unlikely to
+                        * happen, but if it does, warn and kill all
+                        * tracing.
+                        */
+                       WARN_ON(1);
+                       tracing_disabled = 1;
+               }
+               return ret;
+       }
+
+       global_trace.entries = size;
+
+       return ret;
+}
+
+/**
+ * tracing_update_buffers - used by tracing facility to expand ring buffers
+ *
+ * To save on memory when the tracing is never used on a system with it
+ * configured in. The ring buffers are set to a minimum size. But once
+ * a user starts to use the tracing facility, then they need to grow
+ * to their default size.
+ *
+ * This function is to be called when a tracer is about to be used.
+ */
+int tracing_update_buffers(void)
+{
+       int ret = 0;
+
+       mutex_lock(&trace_types_lock);
+       if (!ring_buffer_expanded)
+               ret = tracing_resize_ring_buffer(trace_buf_size);
+       mutex_unlock(&trace_types_lock);
+
+       return ret;
+}
+
 struct trace_option_dentry;
 
 static struct trace_option_dentry *
@@ -2327,6 +2559,14 @@ static int tracing_set_tracer(const char *buf)
        int ret = 0;
 
        mutex_lock(&trace_types_lock);
+
+       if (!ring_buffer_expanded) {
+               ret = tracing_resize_ring_buffer(trace_buf_size);
+               if (ret < 0)
+                       goto out;
+               ret = 0;
+       }
+
        for (t = trace_types; t; t = t->next) {
                if (strcmp(t->name, buf) == 0)
                        break;
@@ -2852,10 +3092,18 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
                     size_t cnt, loff_t *ppos)
 {
        struct trace_array *tr = filp->private_data;
-       char buf[64];
+       char buf[96];
        int r;
 
-       r = sprintf(buf, "%lu\n", tr->entries >> 10);
+       mutex_lock(&trace_types_lock);
+       if (!ring_buffer_expanded)
+               r = sprintf(buf, "%lu (expanded: %lu)\n",
+                           tr->entries >> 10,
+                           trace_buf_size >> 10);
+       else
+               r = sprintf(buf, "%lu\n", tr->entries >> 10);
+       mutex_unlock(&trace_types_lock);
+
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
@@ -2899,28 +3147,11 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
        val <<= 10;
 
        if (val != global_trace.entries) {
-               ret = ring_buffer_resize(global_trace.buffer, val);
+               ret = tracing_resize_ring_buffer(val);
                if (ret < 0) {
                        cnt = ret;
                        goto out;
                }
-
-               ret = ring_buffer_resize(max_tr.buffer, val);
-               if (ret < 0) {
-                       int r;
-                       cnt = ret;
-                       r = ring_buffer_resize(global_trace.buffer,
-                                              global_trace.entries);
-                       if (r < 0) {
-                               /* AARGH! We are left with different
-                                * size max buffer!!!! */
-                               WARN_ON(1);
-                               tracing_disabled = 1;
-                       }
-                       goto out;
-               }
-
-               global_trace.entries = val;
        }
 
        filp->f_pos += cnt;
@@ -2948,7 +3179,7 @@ static int mark_printk(const char *fmt, ...)
        int ret;
        va_list args;
        va_start(args, fmt);
-       ret = trace_vprintk(0, -1, fmt, args);
+       ret = trace_vprintk(0, fmt, args);
        va_end(args);
        return ret;
 }
@@ -3316,6 +3547,9 @@ struct dentry *tracing_init_dentry(void)
        if (d_tracer)
                return d_tracer;
 
+       if (!debugfs_initialized())
+               return NULL;
+
        d_tracer = debugfs_create_dir("tracing", NULL);
 
        if (!d_tracer && !once) {
@@ -3377,10 +3611,15 @@ static void tracing_init_debugfs_percpu(long cpu)
                pr_warning("Could not create debugfs 'trace_pipe' entry\n");
 
        /* per cpu trace */
-       entry = debugfs_create_file("trace", 0444, d_cpu,
+       entry = debugfs_create_file("trace", 0644, d_cpu,
                                (void *) cpu, &tracing_fops);
        if (!entry)
                pr_warning("Could not create debugfs 'trace' entry\n");
+
+       entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
+                                   (void *) cpu, &tracing_buffers_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -3664,7 +3903,6 @@ static __init void create_trace_options_dir(void)
 static __init int tracer_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       struct dentry *buffers;
        struct dentry *entry;
        int cpu;
 
@@ -3687,7 +3925,7 @@ static __init int tracer_init_debugfs(void)
        if (!entry)
                pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
 
-       entry = debugfs_create_file("trace", 0444, d_tracer,
+       entry = debugfs_create_file("trace", 0644, d_tracer,
                                 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
        if (!entry)
                pr_warning("Could not create debugfs 'trace' entry\n");
@@ -3737,26 +3975,6 @@ static __init int tracer_init_debugfs(void)
                pr_warning("Could not create debugfs "
                           "'trace_marker' entry\n");
 
-       buffers = debugfs_create_dir("binary_buffers", d_tracer);
-
-       if (!buffers)
-               pr_warning("Could not create buffers directory\n");
-       else {
-               int cpu;
-               char buf[64];
-
-               for_each_tracing_cpu(cpu) {
-                       sprintf(buf, "%d", cpu);
-
-                       entry = debugfs_create_file(buf, 0444, buffers,
-                                                   (void *)(long)cpu,
-                                                   &tracing_buffers_fops);
-                       if (!entry)
-                               pr_warning("Could not create debugfs buffers "
-                                          "'%s' entry\n", buf);
-               }
-       }
-
 #ifdef CONFIG_DYNAMIC_FTRACE
        entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
                                    &ftrace_update_tot_cnt,
@@ -3837,11 +4055,12 @@ trace_printk_seq(struct trace_seq *s)
        trace_seq_init(s);
 }
 
-void ftrace_dump(void)
+static void __ftrace_dump(bool disable_tracing)
 {
        static DEFINE_SPINLOCK(ftrace_dump_lock);
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
+       unsigned int old_userobj;
        static int dump_ran;
        unsigned long flags;
        int cnt = 0, cpu;
@@ -3853,14 +4072,17 @@ void ftrace_dump(void)
 
        dump_ran = 1;
 
-       /* No turning back! */
        tracing_off();
-       ftrace_kill();
+
+       if (disable_tracing)
+               ftrace_kill();
 
        for_each_tracing_cpu(cpu) {
                atomic_inc(&global_trace.data[cpu]->disabled);
        }
 
+       old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
+
        /* don't look at user memory in panic mode */
        trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
 
@@ -3905,13 +4127,30 @@ void ftrace_dump(void)
        else
                printk(KERN_TRACE "---------------------------------\n");
 
+       /* Re-enable tracing if requested */
+       if (!disable_tracing) {
+               trace_flags |= old_userobj;
+
+               for_each_tracing_cpu(cpu) {
+                       atomic_dec(&global_trace.data[cpu]->disabled);
+               }
+               tracing_on();
+       }
+
  out:
        spin_unlock_irqrestore(&ftrace_dump_lock, flags);
 }
 
+/* By default: disable tracing after the dump */
+void ftrace_dump(void)
+{
+       __ftrace_dump(true);
+}
+
 __init static int tracer_alloc_buffers(void)
 {
        struct trace_array_cpu *data;
+       int ring_buf_size;
        int i;
        int ret = -ENOMEM;
 
@@ -3924,12 +4163,18 @@ __init static int tracer_alloc_buffers(void)
        if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
                goto out_free_tracing_cpumask;
 
+       /* To save memory, keep the ring buffer size to its minimum */
+       if (ring_buffer_expanded)
+               ring_buf_size = trace_buf_size;
+       else
+               ring_buf_size = 1;
+
        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
        cpumask_copy(tracing_cpumask, cpu_all_mask);
        cpumask_clear(tracing_reader_cpumask);
 
        /* TODO: make the number of buffers hot pluggable with CPUS */
-       global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+       global_trace.buffer = ring_buffer_alloc(ring_buf_size,
                                                   TRACE_BUFFER_FLAGS);
        if (!global_trace.buffer) {
                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
@@ -3940,7 +4185,7 @@ __init static int tracer_alloc_buffers(void)
 
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-       max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+       max_tr.buffer = ring_buffer_alloc(ring_buf_size,
                                             TRACE_BUFFER_FLAGS);
        if (!max_tr.buffer) {
                printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
@@ -3972,7 +4217,8 @@ __init static int tracer_alloc_buffers(void)
                                       &trace_panic_notifier);
 
        register_die_notifier(&trace_die_notifier);
-       ret = 0;
+
+       return 0;
 
 out_free_cpumask:
        free_cpumask_var(tracing_reader_cpumask);