sched: Fix raciness in runqueue_is_locked()
[safe/jmp/linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/ring_buffer.h>
15 #include <linux/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/smp_lock.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/ctype.h>
36 #include <linux/init.h>
37 #include <linux/poll.h>
38 #include <linux/gfp.h>
39 #include <linux/fs.h>
40
41 #include "trace.h"
42 #include "trace_output.h"
43
44 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 int ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * Kill all tracing for good (never come back).
83  * It is initialized to 1 but will turn to zero if the initialization
84  * of the tracer is successful. But that is the only place that sets
85  * this back to zero.
86  */
87 static int tracing_disabled = 1;
88
89 DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
90
91 static inline void ftrace_disable_cpu(void)
92 {
93         preempt_disable();
94         local_inc(&__get_cpu_var(ftrace_cpu_disabled));
95 }
96
97 static inline void ftrace_enable_cpu(void)
98 {
99         local_dec(&__get_cpu_var(ftrace_cpu_disabled));
100         preempt_enable();
101 }
102
103 static cpumask_var_t __read_mostly      tracing_buffer_mask;
104
105 /* Define which cpu buffers are currently read in trace_pipe */
106 static cpumask_var_t                    tracing_reader_cpumask;
107
108 #define for_each_tracing_cpu(cpu)       \
109         for_each_cpu(cpu, tracing_buffer_mask)
110
111 /*
112  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
113  *
114  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
115  * is set, then ftrace_dump is called. This will output the contents
116  * of the ftrace buffers to the console.  This is very useful for
117  * capturing traces that lead to crashes and outputing it to a
118  * serial console.
119  *
120  * It is default off, but you can enable it with either specifying
121  * "ftrace_dump_on_oops" in the kernel command line, or setting
122  * /proc/sys/kernel/ftrace_dump_on_oops to true.
123  */
124 int ftrace_dump_on_oops;
125
126 static int tracing_set_tracer(const char *buf);
127
128 #define BOOTUP_TRACER_SIZE              100
129 static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
130 static char *default_bootup_tracer;
131
132 static int __init set_ftrace(char *str)
133 {
134         strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
135         default_bootup_tracer = bootup_tracer_buf;
136         /* We are using ftrace early, expand it */
137         ring_buffer_expanded = 1;
138         return 1;
139 }
140 __setup("ftrace=", set_ftrace);
141
142 static int __init set_ftrace_dump_on_oops(char *str)
143 {
144         ftrace_dump_on_oops = 1;
145         return 1;
146 }
147 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
148
149 unsigned long long ns2usecs(cycle_t nsec)
150 {
151         nsec += 500;
152         do_div(nsec, 1000);
153         return nsec;
154 }
155
156 /*
157  * The global_trace is the descriptor that holds the tracing
158  * buffers for the live tracing. For each CPU, it contains
159  * a link list of pages that will store trace entries. The
160  * page descriptor of the pages in the memory is used to hold
161  * the link list by linking the lru item in the page descriptor
162  * to each of the pages in the buffer per CPU.
163  *
164  * For each active CPU there is a data field that holds the
165  * pages for the buffer for that CPU. Each CPU has the same number
166  * of pages allocated for its buffer.
167  */
168 static struct trace_array       global_trace;
169
170 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
171
172 int filter_current_check_discard(struct ring_buffer *buffer,
173                                  struct ftrace_event_call *call, void *rec,
174                                  struct ring_buffer_event *event)
175 {
176         return filter_check_discard(call, rec, buffer, event);
177 }
178 EXPORT_SYMBOL_GPL(filter_current_check_discard);
179
180 cycle_t ftrace_now(int cpu)
181 {
182         u64 ts;
183
184         /* Early boot up does not have a buffer yet */
185         if (!global_trace.buffer)
186                 return trace_clock_local();
187
188         ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
189         ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
190
191         return ts;
192 }
193
194 /*
195  * The max_tr is used to snapshot the global_trace when a maximum
196  * latency is reached. Some tracers will use this to store a maximum
197  * trace while it continues examining live traces.
198  *
199  * The buffers for the max_tr are set up the same as the global_trace.
200  * When a snapshot is taken, the link list of the max_tr is swapped
201  * with the link list of the global_trace and the buffers are reset for
202  * the global_trace so the tracing can continue.
203  */
204 static struct trace_array       max_tr;
205
206 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
207
208 /* tracer_enabled is used to toggle activation of a tracer */
209 static int                      tracer_enabled = 1;
210
211 /**
212  * tracing_is_enabled - return tracer_enabled status
213  *
214  * This function is used by other tracers to know the status
215  * of the tracer_enabled flag.  Tracers may use this function
216  * to know if it should enable their features when starting
217  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
218  */
219 int tracing_is_enabled(void)
220 {
221         return tracer_enabled;
222 }
223
224 /*
225  * trace_buf_size is the size in bytes that is allocated
226  * for a buffer. Note, the number of bytes is always rounded
227  * to page size.
228  *
229  * This number is purposely set to a low number of 16384.
230  * If the dump on oops happens, it will be much appreciated
231  * to not have to wait for all that output. Anyway this can be
232  * boot time and run time configurable.
233  */
234 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
235
236 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
237
238 /* trace_types holds a link list of available tracers. */
239 static struct tracer            *trace_types __read_mostly;
240
241 /* current_trace points to the tracer that is currently active */
242 static struct tracer            *current_trace __read_mostly;
243
244 /*
245  * max_tracer_type_len is used to simplify the allocating of
246  * buffers to read userspace tracer names. We keep track of
247  * the longest tracer name registered.
248  */
249 static int                      max_tracer_type_len;
250
251 /*
252  * trace_types_lock is used to protect the trace_types list.
253  * This lock is also used to keep user access serialized.
254  * Accesses from userspace will grab this lock while userspace
255  * activities happen inside the kernel.
256  */
257 static DEFINE_MUTEX(trace_types_lock);
258
259 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
260 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
261
262 /* trace_flags holds trace_options default values */
263 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
264         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
265         TRACE_ITER_GRAPH_TIME;
266
267 static int trace_stop_count;
268 static DEFINE_SPINLOCK(tracing_start_lock);
269
270 /**
271  * trace_wake_up - wake up tasks waiting for trace input
272  *
273  * Simply wakes up any task that is blocked on the trace_wait
274  * queue. These is used with trace_poll for tasks polling the trace.
275  */
276 void trace_wake_up(void)
277 {
278         int cpu;
279
280         if (trace_flags & TRACE_ITER_BLOCK)
281                 return;
282         /*
283          * The runqueue_is_locked() can fail, but this is the best we
284          * have for now:
285          */
286         cpu = get_cpu();
287         if (!runqueue_is_locked(cpu))
288                 wake_up(&trace_wait);
289         put_cpu();
290 }
291
292 static int __init set_buf_size(char *str)
293 {
294         unsigned long buf_size;
295
296         if (!str)
297                 return 0;
298         buf_size = memparse(str, &str);
299         /* nr_entries can not be zero */
300         if (buf_size == 0)
301                 return 0;
302         trace_buf_size = buf_size;
303         return 1;
304 }
305 __setup("trace_buf_size=", set_buf_size);
306
307 unsigned long nsecs_to_usecs(unsigned long nsecs)
308 {
309         return nsecs / 1000;
310 }
311
312 /* These must match the bit postions in trace_iterator_flags */
313 static const char *trace_options[] = {
314         "print-parent",
315         "sym-offset",
316         "sym-addr",
317         "verbose",
318         "raw",
319         "hex",
320         "bin",
321         "block",
322         "stacktrace",
323         "sched-tree",
324         "trace_printk",
325         "ftrace_preempt",
326         "branch",
327         "annotate",
328         "userstacktrace",
329         "sym-userobj",
330         "printk-msg-only",
331         "context-info",
332         "latency-format",
333         "sleep-time",
334         "graph-time",
335         NULL
336 };
337
338 static struct {
339         u64 (*func)(void);
340         const char *name;
341 } trace_clocks[] = {
342         { trace_clock_local,    "local" },
343         { trace_clock_global,   "global" },
344 };
345
346 int trace_clock_id;
347
348 /*
349  * trace_parser_get_init - gets the buffer for trace parser
350  */
351 int trace_parser_get_init(struct trace_parser *parser, int size)
352 {
353         memset(parser, 0, sizeof(*parser));
354
355         parser->buffer = kmalloc(size, GFP_KERNEL);
356         if (!parser->buffer)
357                 return 1;
358
359         parser->size = size;
360         return 0;
361 }
362
363 /*
364  * trace_parser_put - frees the buffer for trace parser
365  */
366 void trace_parser_put(struct trace_parser *parser)
367 {
368         kfree(parser->buffer);
369 }
370
371 /*
372  * trace_get_user - reads the user input string separated by  space
373  * (matched by isspace(ch))
374  *
375  * For each string found the 'struct trace_parser' is updated,
376  * and the function returns.
377  *
378  * Returns number of bytes read.
379  *
380  * See kernel/trace/trace.h for 'struct trace_parser' details.
381  */
382 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
383         size_t cnt, loff_t *ppos)
384 {
385         char ch;
386         size_t read = 0;
387         ssize_t ret;
388
389         if (!*ppos)
390                 trace_parser_clear(parser);
391
392         ret = get_user(ch, ubuf++);
393         if (ret)
394                 goto out;
395
396         read++;
397         cnt--;
398
399         /*
400          * The parser is not finished with the last write,
401          * continue reading the user input without skipping spaces.
402          */
403         if (!parser->cont) {
404                 /* skip white space */
405                 while (cnt && isspace(ch)) {
406                         ret = get_user(ch, ubuf++);
407                         if (ret)
408                                 goto out;
409                         read++;
410                         cnt--;
411                 }
412
413                 /* only spaces were written */
414                 if (isspace(ch)) {
415                         *ppos += read;
416                         ret = read;
417                         goto out;
418                 }
419
420                 parser->idx = 0;
421         }
422
423         /* read the non-space input */
424         while (cnt && !isspace(ch)) {
425                 if (parser->idx < parser->size)
426                         parser->buffer[parser->idx++] = ch;
427                 else {
428                         ret = -EINVAL;
429                         goto out;
430                 }
431                 ret = get_user(ch, ubuf++);
432                 if (ret)
433                         goto out;
434                 read++;
435                 cnt--;
436         }
437
438         /* We either got finished input or we have to wait for another call. */
439         if (isspace(ch)) {
440                 parser->buffer[parser->idx] = 0;
441                 parser->cont = false;
442         } else {
443                 parser->cont = true;
444                 parser->buffer[parser->idx++] = ch;
445         }
446
447         *ppos += read;
448         ret = read;
449
450 out:
451         return ret;
452 }
453
454 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
455 {
456         int len;
457         int ret;
458
459         if (!cnt)
460                 return 0;
461
462         if (s->len <= s->readpos)
463                 return -EBUSY;
464
465         len = s->len - s->readpos;
466         if (cnt > len)
467                 cnt = len;
468         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
469         if (ret == cnt)
470                 return -EFAULT;
471
472         cnt -= ret;
473
474         s->readpos += cnt;
475         return cnt;
476 }
477
478 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
479 {
480         int len;
481         void *ret;
482
483         if (s->len <= s->readpos)
484                 return -EBUSY;
485
486         len = s->len - s->readpos;
487         if (cnt > len)
488                 cnt = len;
489         ret = memcpy(buf, s->buffer + s->readpos, cnt);
490         if (!ret)
491                 return -EFAULT;
492
493         s->readpos += cnt;
494         return cnt;
495 }
496
497 /*
498  * ftrace_max_lock is used to protect the swapping of buffers
499  * when taking a max snapshot. The buffers themselves are
500  * protected by per_cpu spinlocks. But the action of the swap
501  * needs its own lock.
502  *
503  * This is defined as a raw_spinlock_t in order to help
504  * with performance when lockdep debugging is enabled.
505  *
506  * It is also used in other places outside the update_max_tr
507  * so it needs to be defined outside of the
508  * CONFIG_TRACER_MAX_TRACE.
509  */
510 static raw_spinlock_t ftrace_max_lock =
511         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
512
513 #ifdef CONFIG_TRACER_MAX_TRACE
514 unsigned long __read_mostly     tracing_max_latency;
515 unsigned long __read_mostly     tracing_thresh;
516
517 /*
518  * Copy the new maximum trace into the separate maximum-trace
519  * structure. (this way the maximum trace is permanently saved,
520  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
521  */
522 static void
523 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
524 {
525         struct trace_array_cpu *data = tr->data[cpu];
526         struct trace_array_cpu *max_data = tr->data[cpu];
527
528         max_tr.cpu = cpu;
529         max_tr.time_start = data->preempt_timestamp;
530
531         max_data = max_tr.data[cpu];
532         max_data->saved_latency = tracing_max_latency;
533         max_data->critical_start = data->critical_start;
534         max_data->critical_end = data->critical_end;
535
536         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
537         max_data->pid = tsk->pid;
538         max_data->uid = task_uid(tsk);
539         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
540         max_data->policy = tsk->policy;
541         max_data->rt_priority = tsk->rt_priority;
542
543         /* record this tasks comm */
544         tracing_record_cmdline(tsk);
545 }
546
547 /**
548  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
549  * @tr: tracer
550  * @tsk: the task with the latency
551  * @cpu: The cpu that initiated the trace.
552  *
553  * Flip the buffers between the @tr and the max_tr and record information
554  * about which task was the cause of this latency.
555  */
556 void
557 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
558 {
559         struct ring_buffer *buf = tr->buffer;
560
561         if (trace_stop_count)
562                 return;
563
564         WARN_ON_ONCE(!irqs_disabled());
565         __raw_spin_lock(&ftrace_max_lock);
566
567         tr->buffer = max_tr.buffer;
568         max_tr.buffer = buf;
569
570         __update_max_tr(tr, tsk, cpu);
571         __raw_spin_unlock(&ftrace_max_lock);
572 }
573
574 /**
575  * update_max_tr_single - only copy one trace over, and reset the rest
576  * @tr - tracer
577  * @tsk - task with the latency
578  * @cpu - the cpu of the buffer to copy.
579  *
580  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
581  */
582 void
583 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
584 {
585         int ret;
586
587         if (trace_stop_count)
588                 return;
589
590         WARN_ON_ONCE(!irqs_disabled());
591         __raw_spin_lock(&ftrace_max_lock);
592
593         ftrace_disable_cpu();
594
595         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
596
597         if (ret == -EBUSY) {
598                 /*
599                  * We failed to swap the buffer due to a commit taking
600                  * place on this CPU. We fail to record, but we reset
601                  * the max trace buffer (no one writes directly to it)
602                  * and flag that it failed.
603                  */
604                 trace_array_printk(&max_tr, _THIS_IP_,
605                         "Failed to swap buffers due to commit in progress\n");
606         }
607
608         ftrace_enable_cpu();
609
610         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
611
612         __update_max_tr(tr, tsk, cpu);
613         __raw_spin_unlock(&ftrace_max_lock);
614 }
615 #endif /* CONFIG_TRACER_MAX_TRACE */
616
617 /**
618  * register_tracer - register a tracer with the ftrace system.
619  * @type - the plugin for the tracer
620  *
621  * Register a new plugin tracer.
622  */
623 int register_tracer(struct tracer *type)
624 __releases(kernel_lock)
625 __acquires(kernel_lock)
626 {
627         struct tracer *t;
628         int len;
629         int ret = 0;
630
631         if (!type->name) {
632                 pr_info("Tracer must have a name\n");
633                 return -1;
634         }
635
636         /*
637          * When this gets called we hold the BKL which means that
638          * preemption is disabled. Various trace selftests however
639          * need to disable and enable preemption for successful tests.
640          * So we drop the BKL here and grab it after the tests again.
641          */
642         unlock_kernel();
643         mutex_lock(&trace_types_lock);
644
645         tracing_selftest_running = true;
646
647         for (t = trace_types; t; t = t->next) {
648                 if (strcmp(type->name, t->name) == 0) {
649                         /* already found */
650                         pr_info("Trace %s already registered\n",
651                                 type->name);
652                         ret = -1;
653                         goto out;
654                 }
655         }
656
657         if (!type->set_flag)
658                 type->set_flag = &dummy_set_flag;
659         if (!type->flags)
660                 type->flags = &dummy_tracer_flags;
661         else
662                 if (!type->flags->opts)
663                         type->flags->opts = dummy_tracer_opt;
664         if (!type->wait_pipe)
665                 type->wait_pipe = default_wait_pipe;
666
667
668 #ifdef CONFIG_FTRACE_STARTUP_TEST
669         if (type->selftest && !tracing_selftest_disabled) {
670                 struct tracer *saved_tracer = current_trace;
671                 struct trace_array *tr = &global_trace;
672
673                 /*
674                  * Run a selftest on this tracer.
675                  * Here we reset the trace buffer, and set the current
676                  * tracer to be this tracer. The tracer can then run some
677                  * internal tracing to verify that everything is in order.
678                  * If we fail, we do not register this tracer.
679                  */
680                 tracing_reset_online_cpus(tr);
681
682                 current_trace = type;
683                 /* the test is responsible for initializing and enabling */
684                 pr_info("Testing tracer %s: ", type->name);
685                 ret = type->selftest(type, tr);
686                 /* the test is responsible for resetting too */
687                 current_trace = saved_tracer;
688                 if (ret) {
689                         printk(KERN_CONT "FAILED!\n");
690                         goto out;
691                 }
692                 /* Only reset on passing, to avoid touching corrupted buffers */
693                 tracing_reset_online_cpus(tr);
694
695                 printk(KERN_CONT "PASSED\n");
696         }
697 #endif
698
699         type->next = trace_types;
700         trace_types = type;
701         len = strlen(type->name);
702         if (len > max_tracer_type_len)
703                 max_tracer_type_len = len;
704
705  out:
706         tracing_selftest_running = false;
707         mutex_unlock(&trace_types_lock);
708
709         if (ret || !default_bootup_tracer)
710                 goto out_unlock;
711
712         if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
713                 goto out_unlock;
714
715         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
716         /* Do we want this tracer to start on bootup? */
717         tracing_set_tracer(type->name);
718         default_bootup_tracer = NULL;
719         /* disable other selftests, since this will break it. */
720         tracing_selftest_disabled = 1;
721 #ifdef CONFIG_FTRACE_STARTUP_TEST
722         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
723                type->name);
724 #endif
725
726  out_unlock:
727         lock_kernel();
728         return ret;
729 }
730
731 void unregister_tracer(struct tracer *type)
732 {
733         struct tracer **t;
734         int len;
735
736         mutex_lock(&trace_types_lock);
737         for (t = &trace_types; *t; t = &(*t)->next) {
738                 if (*t == type)
739                         goto found;
740         }
741         pr_info("Trace %s not registered\n", type->name);
742         goto out;
743
744  found:
745         *t = (*t)->next;
746
747         if (type == current_trace && tracer_enabled) {
748                 tracer_enabled = 0;
749                 tracing_stop();
750                 if (current_trace->stop)
751                         current_trace->stop(&global_trace);
752                 current_trace = &nop_trace;
753         }
754
755         if (strlen(type->name) != max_tracer_type_len)
756                 goto out;
757
758         max_tracer_type_len = 0;
759         for (t = &trace_types; *t; t = &(*t)->next) {
760                 len = strlen((*t)->name);
761                 if (len > max_tracer_type_len)
762                         max_tracer_type_len = len;
763         }
764  out:
765         mutex_unlock(&trace_types_lock);
766 }
767
768 static void __tracing_reset(struct trace_array *tr, int cpu)
769 {
770         ftrace_disable_cpu();
771         ring_buffer_reset_cpu(tr->buffer, cpu);
772         ftrace_enable_cpu();
773 }
774
775 void tracing_reset(struct trace_array *tr, int cpu)
776 {
777         struct ring_buffer *buffer = tr->buffer;
778
779         ring_buffer_record_disable(buffer);
780
781         /* Make sure all commits have finished */
782         synchronize_sched();
783         __tracing_reset(tr, cpu);
784
785         ring_buffer_record_enable(buffer);
786 }
787
788 void tracing_reset_online_cpus(struct trace_array *tr)
789 {
790         struct ring_buffer *buffer = tr->buffer;
791         int cpu;
792
793         ring_buffer_record_disable(buffer);
794
795         /* Make sure all commits have finished */
796         synchronize_sched();
797
798         tr->time_start = ftrace_now(tr->cpu);
799
800         for_each_online_cpu(cpu)
801                 __tracing_reset(tr, cpu);
802
803         ring_buffer_record_enable(buffer);
804 }
805
806 void tracing_reset_current(int cpu)
807 {
808         tracing_reset(&global_trace, cpu);
809 }
810
811 void tracing_reset_current_online_cpus(void)
812 {
813         tracing_reset_online_cpus(&global_trace);
814 }
815
816 #define SAVED_CMDLINES 128
817 #define NO_CMDLINE_MAP UINT_MAX
818 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
819 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
820 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
821 static int cmdline_idx;
822 static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
823
824 /* temporary disable recording */
825 static atomic_t trace_record_cmdline_disabled __read_mostly;
826
827 static void trace_init_cmdlines(void)
828 {
829         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
830         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
831         cmdline_idx = 0;
832 }
833
834 int is_tracing_stopped(void)
835 {
836         return trace_stop_count;
837 }
838
839 /**
840  * ftrace_off_permanent - disable all ftrace code permanently
841  *
842  * This should only be called when a serious anomally has
843  * been detected.  This will turn off the function tracing,
844  * ring buffers, and other tracing utilites. It takes no
845  * locks and can be called from any context.
846  */
847 void ftrace_off_permanent(void)
848 {
849         tracing_disabled = 1;
850         ftrace_stop();
851         tracing_off_permanent();
852 }
853
854 /**
855  * tracing_start - quick start of the tracer
856  *
857  * If tracing is enabled but was stopped by tracing_stop,
858  * this will start the tracer back up.
859  */
860 void tracing_start(void)
861 {
862         struct ring_buffer *buffer;
863         unsigned long flags;
864
865         if (tracing_disabled)
866                 return;
867
868         spin_lock_irqsave(&tracing_start_lock, flags);
869         if (--trace_stop_count) {
870                 if (trace_stop_count < 0) {
871                         /* Someone screwed up their debugging */
872                         WARN_ON_ONCE(1);
873                         trace_stop_count = 0;
874                 }
875                 goto out;
876         }
877
878
879         buffer = global_trace.buffer;
880         if (buffer)
881                 ring_buffer_record_enable(buffer);
882
883         buffer = max_tr.buffer;
884         if (buffer)
885                 ring_buffer_record_enable(buffer);
886
887         ftrace_start();
888  out:
889         spin_unlock_irqrestore(&tracing_start_lock, flags);
890 }
891
892 /**
893  * tracing_stop - quick stop of the tracer
894  *
895  * Light weight way to stop tracing. Use in conjunction with
896  * tracing_start.
897  */
898 void tracing_stop(void)
899 {
900         struct ring_buffer *buffer;
901         unsigned long flags;
902
903         ftrace_stop();
904         spin_lock_irqsave(&tracing_start_lock, flags);
905         if (trace_stop_count++)
906                 goto out;
907
908         buffer = global_trace.buffer;
909         if (buffer)
910                 ring_buffer_record_disable(buffer);
911
912         buffer = max_tr.buffer;
913         if (buffer)
914                 ring_buffer_record_disable(buffer);
915
916  out:
917         spin_unlock_irqrestore(&tracing_start_lock, flags);
918 }
919
920 void trace_stop_cmdline_recording(void);
921
922 static void trace_save_cmdline(struct task_struct *tsk)
923 {
924         unsigned pid, idx;
925
926         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
927                 return;
928
929         /*
930          * It's not the end of the world if we don't get
931          * the lock, but we also don't want to spin
932          * nor do we want to disable interrupts,
933          * so if we miss here, then better luck next time.
934          */
935         if (!__raw_spin_trylock(&trace_cmdline_lock))
936                 return;
937
938         idx = map_pid_to_cmdline[tsk->pid];
939         if (idx == NO_CMDLINE_MAP) {
940                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
941
942                 /*
943                  * Check whether the cmdline buffer at idx has a pid
944                  * mapped. We are going to overwrite that entry so we
945                  * need to clear the map_pid_to_cmdline. Otherwise we
946                  * would read the new comm for the old pid.
947                  */
948                 pid = map_cmdline_to_pid[idx];
949                 if (pid != NO_CMDLINE_MAP)
950                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
951
952                 map_cmdline_to_pid[idx] = tsk->pid;
953                 map_pid_to_cmdline[tsk->pid] = idx;
954
955                 cmdline_idx = idx;
956         }
957
958         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
959
960         __raw_spin_unlock(&trace_cmdline_lock);
961 }
962
963 void trace_find_cmdline(int pid, char comm[])
964 {
965         unsigned map;
966
967         if (!pid) {
968                 strcpy(comm, "<idle>");
969                 return;
970         }
971
972         if (pid > PID_MAX_DEFAULT) {
973                 strcpy(comm, "<...>");
974                 return;
975         }
976
977         preempt_disable();
978         __raw_spin_lock(&trace_cmdline_lock);
979         map = map_pid_to_cmdline[pid];
980         if (map != NO_CMDLINE_MAP)
981                 strcpy(comm, saved_cmdlines[map]);
982         else
983                 strcpy(comm, "<...>");
984
985         __raw_spin_unlock(&trace_cmdline_lock);
986         preempt_enable();
987 }
988
989 void tracing_record_cmdline(struct task_struct *tsk)
990 {
991         if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
992             !tracing_is_on())
993                 return;
994
995         trace_save_cmdline(tsk);
996 }
997
998 void
999 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1000                              int pc)
1001 {
1002         struct task_struct *tsk = current;
1003
1004         entry->preempt_count            = pc & 0xff;
1005         entry->pid                      = (tsk) ? tsk->pid : 0;
1006         entry->lock_depth               = (tsk) ? tsk->lock_depth : 0;
1007         entry->flags =
1008 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1009                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1010 #else
1011                 TRACE_FLAG_IRQS_NOSUPPORT |
1012 #endif
1013                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1014                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1015                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1016 }
1017 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1018
1019 struct ring_buffer_event *
1020 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1021                           int type,
1022                           unsigned long len,
1023                           unsigned long flags, int pc)
1024 {
1025         struct ring_buffer_event *event;
1026
1027         event = ring_buffer_lock_reserve(buffer, len);
1028         if (event != NULL) {
1029                 struct trace_entry *ent = ring_buffer_event_data(event);
1030
1031                 tracing_generic_entry_update(ent, flags, pc);
1032                 ent->type = type;
1033         }
1034
1035         return event;
1036 }
1037
1038 static inline void
1039 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1040                              struct ring_buffer_event *event,
1041                              unsigned long flags, int pc,
1042                              int wake)
1043 {
1044         ring_buffer_unlock_commit(buffer, event);
1045
1046         ftrace_trace_stack(buffer, flags, 6, pc);
1047         ftrace_trace_userstack(buffer, flags, pc);
1048
1049         if (wake)
1050                 trace_wake_up();
1051 }
1052
1053 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1054                                 struct ring_buffer_event *event,
1055                                 unsigned long flags, int pc)
1056 {
1057         __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1058 }
1059
1060 struct ring_buffer_event *
1061 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1062                                   int type, unsigned long len,
1063                                   unsigned long flags, int pc)
1064 {
1065         *current_rb = global_trace.buffer;
1066         return trace_buffer_lock_reserve(*current_rb,
1067                                          type, len, flags, pc);
1068 }
1069 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1070
1071 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1072                                         struct ring_buffer_event *event,
1073                                         unsigned long flags, int pc)
1074 {
1075         __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1076 }
1077 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1078
1079 void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1080                                        struct ring_buffer_event *event,
1081                                        unsigned long flags, int pc)
1082 {
1083         __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
1084 }
1085 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1086
1087 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1088                                          struct ring_buffer_event *event)
1089 {
1090         ring_buffer_discard_commit(buffer, event);
1091 }
1092 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1093
1094 void
1095 trace_function(struct trace_array *tr,
1096                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1097                int pc)
1098 {
1099         struct ftrace_event_call *call = &event_function;
1100         struct ring_buffer *buffer = tr->buffer;
1101         struct ring_buffer_event *event;
1102         struct ftrace_entry *entry;
1103
1104         /* If we are reading the ring buffer, don't trace */
1105         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
1106                 return;
1107
1108         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1109                                           flags, pc);
1110         if (!event)
1111                 return;
1112         entry   = ring_buffer_event_data(event);
1113         entry->ip                       = ip;
1114         entry->parent_ip                = parent_ip;
1115
1116         if (!filter_check_discard(call, entry, buffer, event))
1117                 ring_buffer_unlock_commit(buffer, event);
1118 }
1119
1120 void
1121 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1122        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1123        int pc)
1124 {
1125         if (likely(!atomic_read(&data->disabled)))
1126                 trace_function(tr, ip, parent_ip, flags, pc);
1127 }
1128
1129 #ifdef CONFIG_STACKTRACE
1130 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1131                                  unsigned long flags,
1132                                  int skip, int pc)
1133 {
1134         struct ftrace_event_call *call = &event_kernel_stack;
1135         struct ring_buffer_event *event;
1136         struct stack_entry *entry;
1137         struct stack_trace trace;
1138
1139         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1140                                           sizeof(*entry), flags, pc);
1141         if (!event)
1142                 return;
1143         entry   = ring_buffer_event_data(event);
1144         memset(&entry->caller, 0, sizeof(entry->caller));
1145
1146         trace.nr_entries        = 0;
1147         trace.max_entries       = FTRACE_STACK_ENTRIES;
1148         trace.skip              = skip;
1149         trace.entries           = entry->caller;
1150
1151         save_stack_trace(&trace);
1152         if (!filter_check_discard(call, entry, buffer, event))
1153                 ring_buffer_unlock_commit(buffer, event);
1154 }
1155
1156 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1157                         int skip, int pc)
1158 {
1159         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1160                 return;
1161
1162         __ftrace_trace_stack(buffer, flags, skip, pc);
1163 }
1164
1165 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1166                    int pc)
1167 {
1168         __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1169 }
1170
1171 void
1172 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1173 {
1174         struct ftrace_event_call *call = &event_user_stack;
1175         struct ring_buffer_event *event;
1176         struct userstack_entry *entry;
1177         struct stack_trace trace;
1178
1179         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1180                 return;
1181
1182         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1183                                           sizeof(*entry), flags, pc);
1184         if (!event)
1185                 return;
1186         entry   = ring_buffer_event_data(event);
1187
1188         entry->tgid             = current->tgid;
1189         memset(&entry->caller, 0, sizeof(entry->caller));
1190
1191         trace.nr_entries        = 0;
1192         trace.max_entries       = FTRACE_STACK_ENTRIES;
1193         trace.skip              = 0;
1194         trace.entries           = entry->caller;
1195
1196         save_stack_trace_user(&trace);
1197         if (!filter_check_discard(call, entry, buffer, event))
1198                 ring_buffer_unlock_commit(buffer, event);
1199 }
1200
1201 #ifdef UNUSED
1202 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1203 {
1204         ftrace_trace_userstack(tr, flags, preempt_count());
1205 }
1206 #endif /* UNUSED */
1207
1208 #endif /* CONFIG_STACKTRACE */
1209
1210 static void
1211 ftrace_trace_special(void *__tr,
1212                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
1213                      int pc)
1214 {
1215         struct ftrace_event_call *call = &event_special;
1216         struct ring_buffer_event *event;
1217         struct trace_array *tr = __tr;
1218         struct ring_buffer *buffer = tr->buffer;
1219         struct special_entry *entry;
1220
1221         event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1222                                           sizeof(*entry), 0, pc);
1223         if (!event)
1224                 return;
1225         entry   = ring_buffer_event_data(event);
1226         entry->arg1                     = arg1;
1227         entry->arg2                     = arg2;
1228         entry->arg3                     = arg3;
1229
1230         if (!filter_check_discard(call, entry, buffer, event))
1231                 trace_buffer_unlock_commit(buffer, event, 0, pc);
1232 }
1233
1234 void
1235 __trace_special(void *__tr, void *__data,
1236                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1237 {
1238         ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1239 }
1240
1241 void
1242 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1243 {
1244         struct trace_array *tr = &global_trace;
1245         struct trace_array_cpu *data;
1246         unsigned long flags;
1247         int cpu;
1248         int pc;
1249
1250         if (tracing_disabled)
1251                 return;
1252
1253         pc = preempt_count();
1254         local_irq_save(flags);
1255         cpu = raw_smp_processor_id();
1256         data = tr->data[cpu];
1257
1258         if (likely(atomic_inc_return(&data->disabled) == 1))
1259                 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1260
1261         atomic_dec(&data->disabled);
1262         local_irq_restore(flags);
1263 }
1264
1265 /**
1266  * trace_vbprintk - write binary msg to tracing buffer
1267  *
1268  */
1269 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1270 {
1271         static raw_spinlock_t trace_buf_lock =
1272                 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1273         static u32 trace_buf[TRACE_BUF_SIZE];
1274
1275         struct ftrace_event_call *call = &event_bprint;
1276         struct ring_buffer_event *event;
1277         struct ring_buffer *buffer;
1278         struct trace_array *tr = &global_trace;
1279         struct trace_array_cpu *data;
1280         struct bprint_entry *entry;
1281         unsigned long flags;
1282         int disable;
1283         int resched;
1284         int cpu, len = 0, size, pc;
1285
1286         if (unlikely(tracing_selftest_running || tracing_disabled))
1287                 return 0;
1288
1289         /* Don't pollute graph traces with trace_vprintk internals */
1290         pause_graph_tracing();
1291
1292         pc = preempt_count();
1293         resched = ftrace_preempt_disable();
1294         cpu = raw_smp_processor_id();
1295         data = tr->data[cpu];
1296
1297         disable = atomic_inc_return(&data->disabled);
1298         if (unlikely(disable != 1))
1299                 goto out;
1300
1301         /* Lockdep uses trace_printk for lock tracing */
1302         local_irq_save(flags);
1303         __raw_spin_lock(&trace_buf_lock);
1304         len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1305
1306         if (len > TRACE_BUF_SIZE || len < 0)
1307                 goto out_unlock;
1308
1309         size = sizeof(*entry) + sizeof(u32) * len;
1310         buffer = tr->buffer;
1311         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1312                                           flags, pc);
1313         if (!event)
1314                 goto out_unlock;
1315         entry = ring_buffer_event_data(event);
1316         entry->ip                       = ip;
1317         entry->fmt                      = fmt;
1318
1319         memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1320         if (!filter_check_discard(call, entry, buffer, event))
1321                 ring_buffer_unlock_commit(buffer, event);
1322
1323 out_unlock:
1324         __raw_spin_unlock(&trace_buf_lock);
1325         local_irq_restore(flags);
1326
1327 out:
1328         atomic_dec_return(&data->disabled);
1329         ftrace_preempt_enable(resched);
1330         unpause_graph_tracing();
1331
1332         return len;
1333 }
1334 EXPORT_SYMBOL_GPL(trace_vbprintk);
1335
1336 int trace_array_printk(struct trace_array *tr,
1337                        unsigned long ip, const char *fmt, ...)
1338 {
1339         int ret;
1340         va_list ap;
1341
1342         if (!(trace_flags & TRACE_ITER_PRINTK))
1343                 return 0;
1344
1345         va_start(ap, fmt);
1346         ret = trace_array_vprintk(tr, ip, fmt, ap);
1347         va_end(ap);
1348         return ret;
1349 }
1350
1351 int trace_array_vprintk(struct trace_array *tr,
1352                         unsigned long ip, const char *fmt, va_list args)
1353 {
1354         static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1355         static char trace_buf[TRACE_BUF_SIZE];
1356
1357         struct ftrace_event_call *call = &event_print;
1358         struct ring_buffer_event *event;
1359         struct ring_buffer *buffer;
1360         struct trace_array_cpu *data;
1361         int cpu, len = 0, size, pc;
1362         struct print_entry *entry;
1363         unsigned long irq_flags;
1364         int disable;
1365
1366         if (tracing_disabled || tracing_selftest_running)
1367                 return 0;
1368
1369         pc = preempt_count();
1370         preempt_disable_notrace();
1371         cpu = raw_smp_processor_id();
1372         data = tr->data[cpu];
1373
1374         disable = atomic_inc_return(&data->disabled);
1375         if (unlikely(disable != 1))
1376                 goto out;
1377
1378         pause_graph_tracing();
1379         raw_local_irq_save(irq_flags);
1380         __raw_spin_lock(&trace_buf_lock);
1381         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1382
1383         len = min(len, TRACE_BUF_SIZE-1);
1384         trace_buf[len] = 0;
1385
1386         size = sizeof(*entry) + len + 1;
1387         buffer = tr->buffer;
1388         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1389                                           irq_flags, pc);
1390         if (!event)
1391                 goto out_unlock;
1392         entry = ring_buffer_event_data(event);
1393         entry->ip                       = ip;
1394
1395         memcpy(&entry->buf, trace_buf, len);
1396         entry->buf[len] = 0;
1397         if (!filter_check_discard(call, entry, buffer, event))
1398                 ring_buffer_unlock_commit(buffer, event);
1399
1400  out_unlock:
1401         __raw_spin_unlock(&trace_buf_lock);
1402         raw_local_irq_restore(irq_flags);
1403         unpause_graph_tracing();
1404  out:
1405         atomic_dec_return(&data->disabled);
1406         preempt_enable_notrace();
1407
1408         return len;
1409 }
1410
1411 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1412 {
1413         return trace_array_printk(&global_trace, ip, fmt, args);
1414 }
1415 EXPORT_SYMBOL_GPL(trace_vprintk);
1416
1417 enum trace_file_type {
1418         TRACE_FILE_LAT_FMT      = 1,
1419         TRACE_FILE_ANNOTATE     = 2,
1420 };
1421
1422 static void trace_iterator_increment(struct trace_iterator *iter)
1423 {
1424         /* Don't allow ftrace to trace into the ring buffers */
1425         ftrace_disable_cpu();
1426
1427         iter->idx++;
1428         if (iter->buffer_iter[iter->cpu])
1429                 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1430
1431         ftrace_enable_cpu();
1432 }
1433
1434 static struct trace_entry *
1435 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1436 {
1437         struct ring_buffer_event *event;
1438         struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1439
1440         /* Don't allow ftrace to trace into the ring buffers */
1441         ftrace_disable_cpu();
1442
1443         if (buf_iter)
1444                 event = ring_buffer_iter_peek(buf_iter, ts);
1445         else
1446                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1447
1448         ftrace_enable_cpu();
1449
1450         return event ? ring_buffer_event_data(event) : NULL;
1451 }
1452
1453 static struct trace_entry *
1454 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1455 {
1456         struct ring_buffer *buffer = iter->tr->buffer;
1457         struct trace_entry *ent, *next = NULL;
1458         int cpu_file = iter->cpu_file;
1459         u64 next_ts = 0, ts;
1460         int next_cpu = -1;
1461         int cpu;
1462
1463         /*
1464          * If we are in a per_cpu trace file, don't bother by iterating over
1465          * all cpu and peek directly.
1466          */
1467         if (cpu_file > TRACE_PIPE_ALL_CPU) {
1468                 if (ring_buffer_empty_cpu(buffer, cpu_file))
1469                         return NULL;
1470                 ent = peek_next_entry(iter, cpu_file, ent_ts);
1471                 if (ent_cpu)
1472                         *ent_cpu = cpu_file;
1473
1474                 return ent;
1475         }
1476
1477         for_each_tracing_cpu(cpu) {
1478
1479                 if (ring_buffer_empty_cpu(buffer, cpu))
1480                         continue;
1481
1482                 ent = peek_next_entry(iter, cpu, &ts);
1483
1484                 /*
1485                  * Pick the entry with the smallest timestamp:
1486                  */
1487                 if (ent && (!next || ts < next_ts)) {
1488                         next = ent;
1489                         next_cpu = cpu;
1490                         next_ts = ts;
1491                 }
1492         }
1493
1494         if (ent_cpu)
1495                 *ent_cpu = next_cpu;
1496
1497         if (ent_ts)
1498                 *ent_ts = next_ts;
1499
1500         return next;
1501 }
1502
1503 /* Find the next real entry, without updating the iterator itself */
1504 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1505                                           int *ent_cpu, u64 *ent_ts)
1506 {
1507         return __find_next_entry(iter, ent_cpu, ent_ts);
1508 }
1509
1510 /* Find the next real entry, and increment the iterator to the next entry */
1511 static void *find_next_entry_inc(struct trace_iterator *iter)
1512 {
1513         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1514
1515         if (iter->ent)
1516                 trace_iterator_increment(iter);
1517
1518         return iter->ent ? iter : NULL;
1519 }
1520
1521 static void trace_consume(struct trace_iterator *iter)
1522 {
1523         /* Don't allow ftrace to trace into the ring buffers */
1524         ftrace_disable_cpu();
1525         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1526         ftrace_enable_cpu();
1527 }
1528
1529 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1530 {
1531         struct trace_iterator *iter = m->private;
1532         int i = (int)*pos;
1533         void *ent;
1534
1535         (*pos)++;
1536
1537         /* can't go backwards */
1538         if (iter->idx > i)
1539                 return NULL;
1540
1541         if (iter->idx < 0)
1542                 ent = find_next_entry_inc(iter);
1543         else
1544                 ent = iter;
1545
1546         while (ent && iter->idx < i)
1547                 ent = find_next_entry_inc(iter);
1548
1549         iter->pos = *pos;
1550
1551         return ent;
1552 }
1553
1554 static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1555 {
1556         struct trace_array *tr = iter->tr;
1557         struct ring_buffer_event *event;
1558         struct ring_buffer_iter *buf_iter;
1559         unsigned long entries = 0;
1560         u64 ts;
1561
1562         tr->data[cpu]->skipped_entries = 0;
1563
1564         if (!iter->buffer_iter[cpu])
1565                 return;
1566
1567         buf_iter = iter->buffer_iter[cpu];
1568         ring_buffer_iter_reset(buf_iter);
1569
1570         /*
1571          * We could have the case with the max latency tracers
1572          * that a reset never took place on a cpu. This is evident
1573          * by the timestamp being before the start of the buffer.
1574          */
1575         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1576                 if (ts >= iter->tr->time_start)
1577                         break;
1578                 entries++;
1579                 ring_buffer_read(buf_iter, NULL);
1580         }
1581
1582         tr->data[cpu]->skipped_entries = entries;
1583 }
1584
1585 /*
1586  * No necessary locking here. The worst thing which can
1587  * happen is loosing events consumed at the same time
1588  * by a trace_pipe reader.
1589  * Other than that, we don't risk to crash the ring buffer
1590  * because it serializes the readers.
1591  *
1592  * The current tracer is copied to avoid a global locking
1593  * all around.
1594  */
1595 static void *s_start(struct seq_file *m, loff_t *pos)
1596 {
1597         struct trace_iterator *iter = m->private;
1598         static struct tracer *old_tracer;
1599         int cpu_file = iter->cpu_file;
1600         void *p = NULL;
1601         loff_t l = 0;
1602         int cpu;
1603
1604         /* copy the tracer to avoid using a global lock all around */
1605         mutex_lock(&trace_types_lock);
1606         if (unlikely(old_tracer != current_trace && current_trace)) {
1607                 old_tracer = current_trace;
1608                 *iter->trace = *current_trace;
1609         }
1610         mutex_unlock(&trace_types_lock);
1611
1612         atomic_inc(&trace_record_cmdline_disabled);
1613
1614         if (*pos != iter->pos) {
1615                 iter->ent = NULL;
1616                 iter->cpu = 0;
1617                 iter->idx = -1;
1618
1619                 ftrace_disable_cpu();
1620
1621                 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1622                         for_each_tracing_cpu(cpu)
1623                                 tracing_iter_reset(iter, cpu);
1624                 } else
1625                         tracing_iter_reset(iter, cpu_file);
1626
1627                 ftrace_enable_cpu();
1628
1629                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1630                         ;
1631
1632         } else {
1633                 l = *pos - 1;
1634                 p = s_next(m, p, &l);
1635         }
1636
1637         trace_event_read_lock();
1638         return p;
1639 }
1640
1641 static void s_stop(struct seq_file *m, void *p)
1642 {
1643         atomic_dec(&trace_record_cmdline_disabled);
1644         trace_event_read_unlock();
1645 }
1646
1647 static void print_lat_help_header(struct seq_file *m)
1648 {
1649         seq_puts(m, "#                  _------=> CPU#            \n");
1650         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1651         seq_puts(m, "#                | / _----=> need-resched    \n");
1652         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1653         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1654         seq_puts(m, "#                |||| /_--=> lock-depth       \n");
1655         seq_puts(m, "#                |||||/     delay             \n");
1656         seq_puts(m, "#  cmd     pid   |||||| time  |   caller      \n");
1657         seq_puts(m, "#     \\   /      ||||||   \\   |   /           \n");
1658 }
1659
1660 static void print_func_help_header(struct seq_file *m)
1661 {
1662         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1663         seq_puts(m, "#              | |       |          |         |\n");
1664 }
1665
1666
1667 static void
1668 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1669 {
1670         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1671         struct trace_array *tr = iter->tr;
1672         struct trace_array_cpu *data = tr->data[tr->cpu];
1673         struct tracer *type = current_trace;
1674         unsigned long entries = 0;
1675         unsigned long total = 0;
1676         unsigned long count;
1677         const char *name = "preemption";
1678         int cpu;
1679
1680         if (type)
1681                 name = type->name;
1682
1683
1684         for_each_tracing_cpu(cpu) {
1685                 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1686                 /*
1687                  * If this buffer has skipped entries, then we hold all
1688                  * entries for the trace and we need to ignore the
1689                  * ones before the time stamp.
1690                  */
1691                 if (tr->data[cpu]->skipped_entries) {
1692                         count -= tr->data[cpu]->skipped_entries;
1693                         /* total is the same as the entries */
1694                         total += count;
1695                 } else
1696                         total += count +
1697                                 ring_buffer_overrun_cpu(tr->buffer, cpu);
1698                 entries += count;
1699         }
1700
1701         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1702                    name, UTS_RELEASE);
1703         seq_puts(m, "# -----------------------------------"
1704                  "---------------------------------\n");
1705         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
1706                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1707                    nsecs_to_usecs(data->saved_latency),
1708                    entries,
1709                    total,
1710                    tr->cpu,
1711 #if defined(CONFIG_PREEMPT_NONE)
1712                    "server",
1713 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1714                    "desktop",
1715 #elif defined(CONFIG_PREEMPT)
1716                    "preempt",
1717 #else
1718                    "unknown",
1719 #endif
1720                    /* These are reserved for later use */
1721                    0, 0, 0, 0);
1722 #ifdef CONFIG_SMP
1723         seq_printf(m, " #P:%d)\n", num_online_cpus());
1724 #else
1725         seq_puts(m, ")\n");
1726 #endif
1727         seq_puts(m, "#    -----------------\n");
1728         seq_printf(m, "#    | task: %.16s-%d "
1729                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1730                    data->comm, data->pid, data->uid, data->nice,
1731                    data->policy, data->rt_priority);
1732         seq_puts(m, "#    -----------------\n");
1733
1734         if (data->critical_start) {
1735                 seq_puts(m, "#  => started at: ");
1736                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1737                 trace_print_seq(m, &iter->seq);
1738                 seq_puts(m, "\n#  => ended at:   ");
1739                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1740                 trace_print_seq(m, &iter->seq);
1741                 seq_puts(m, "\n#\n");
1742         }
1743
1744         seq_puts(m, "#\n");
1745 }
1746
1747 static void test_cpu_buff_start(struct trace_iterator *iter)
1748 {
1749         struct trace_seq *s = &iter->seq;
1750
1751         if (!(trace_flags & TRACE_ITER_ANNOTATE))
1752                 return;
1753
1754         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1755                 return;
1756
1757         if (cpumask_test_cpu(iter->cpu, iter->started))
1758                 return;
1759
1760         if (iter->tr->data[iter->cpu]->skipped_entries)
1761                 return;
1762
1763         cpumask_set_cpu(iter->cpu, iter->started);
1764
1765         /* Don't print started cpu buffer for the first entry of the trace */
1766         if (iter->idx > 1)
1767                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
1768                                 iter->cpu);
1769 }
1770
1771 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1772 {
1773         struct trace_seq *s = &iter->seq;
1774         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1775         struct trace_entry *entry;
1776         struct trace_event *event;
1777
1778         entry = iter->ent;
1779
1780         test_cpu_buff_start(iter);
1781
1782         event = ftrace_find_event(entry->type);
1783
1784         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1785                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1786                         if (!trace_print_lat_context(iter))
1787                                 goto partial;
1788                 } else {
1789                         if (!trace_print_context(iter))
1790                                 goto partial;
1791                 }
1792         }
1793
1794         if (event)
1795                 return event->trace(iter, sym_flags);
1796
1797         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1798                 goto partial;
1799
1800         return TRACE_TYPE_HANDLED;
1801 partial:
1802         return TRACE_TYPE_PARTIAL_LINE;
1803 }
1804
1805 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1806 {
1807         struct trace_seq *s = &iter->seq;
1808         struct trace_entry *entry;
1809         struct trace_event *event;
1810
1811         entry = iter->ent;
1812
1813         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1814                 if (!trace_seq_printf(s, "%d %d %llu ",
1815                                       entry->pid, iter->cpu, iter->ts))
1816                         goto partial;
1817         }
1818
1819         event = ftrace_find_event(entry->type);
1820         if (event)
1821                 return event->raw(iter, 0);
1822
1823         if (!trace_seq_printf(s, "%d ?\n", entry->type))
1824                 goto partial;
1825
1826         return TRACE_TYPE_HANDLED;
1827 partial:
1828         return TRACE_TYPE_PARTIAL_LINE;
1829 }
1830
1831 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1832 {
1833         struct trace_seq *s = &iter->seq;
1834         unsigned char newline = '\n';
1835         struct trace_entry *entry;
1836         struct trace_event *event;
1837
1838         entry = iter->ent;
1839
1840         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1841                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1842                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1843                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1844         }
1845
1846         event = ftrace_find_event(entry->type);
1847         if (event) {
1848                 enum print_line_t ret = event->hex(iter, 0);
1849                 if (ret != TRACE_TYPE_HANDLED)
1850                         return ret;
1851         }
1852
1853         SEQ_PUT_FIELD_RET(s, newline);
1854
1855         return TRACE_TYPE_HANDLED;
1856 }
1857
1858 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1859 {
1860         struct trace_seq *s = &iter->seq;
1861         struct trace_entry *entry;
1862         struct trace_event *event;
1863
1864         entry = iter->ent;
1865
1866         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1867                 SEQ_PUT_FIELD_RET(s, entry->pid);
1868                 SEQ_PUT_FIELD_RET(s, iter->cpu);
1869                 SEQ_PUT_FIELD_RET(s, iter->ts);
1870         }
1871
1872         event = ftrace_find_event(entry->type);
1873         return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
1874 }
1875
1876 static int trace_empty(struct trace_iterator *iter)
1877 {
1878         int cpu;
1879
1880         /* If we are looking at one CPU buffer, only check that one */
1881         if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
1882                 cpu = iter->cpu_file;
1883                 if (iter->buffer_iter[cpu]) {
1884                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1885                                 return 0;
1886                 } else {
1887                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1888                                 return 0;
1889                 }
1890                 return 1;
1891         }
1892
1893         for_each_tracing_cpu(cpu) {
1894                 if (iter->buffer_iter[cpu]) {
1895                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1896                                 return 0;
1897                 } else {
1898                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1899                                 return 0;
1900                 }
1901         }
1902
1903         return 1;
1904 }
1905
1906 /*  Called with trace_event_read_lock() held. */
1907 static enum print_line_t print_trace_line(struct trace_iterator *iter)
1908 {
1909         enum print_line_t ret;
1910
1911         if (iter->trace && iter->trace->print_line) {
1912                 ret = iter->trace->print_line(iter);
1913                 if (ret != TRACE_TYPE_UNHANDLED)
1914                         return ret;
1915         }
1916
1917         if (iter->ent->type == TRACE_BPRINT &&
1918                         trace_flags & TRACE_ITER_PRINTK &&
1919                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1920                 return trace_print_bprintk_msg_only(iter);
1921
1922         if (iter->ent->type == TRACE_PRINT &&
1923                         trace_flags & TRACE_ITER_PRINTK &&
1924                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1925                 return trace_print_printk_msg_only(iter);
1926
1927         if (trace_flags & TRACE_ITER_BIN)
1928                 return print_bin_fmt(iter);
1929
1930         if (trace_flags & TRACE_ITER_HEX)
1931                 return print_hex_fmt(iter);
1932
1933         if (trace_flags & TRACE_ITER_RAW)
1934                 return print_raw_fmt(iter);
1935
1936         return print_trace_fmt(iter);
1937 }
1938
1939 static int s_show(struct seq_file *m, void *v)
1940 {
1941         struct trace_iterator *iter = v;
1942
1943         if (iter->ent == NULL) {
1944                 if (iter->tr) {
1945                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1946                         seq_puts(m, "#\n");
1947                 }
1948                 if (iter->trace && iter->trace->print_header)
1949                         iter->trace->print_header(m);
1950                 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1951                         /* print nothing if the buffers are empty */
1952                         if (trace_empty(iter))
1953                                 return 0;
1954                         print_trace_header(m, iter);
1955                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1956                                 print_lat_help_header(m);
1957                 } else {
1958                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1959                                 print_func_help_header(m);
1960                 }
1961         } else {
1962                 print_trace_line(iter);
1963                 trace_print_seq(m, &iter->seq);
1964         }
1965
1966         return 0;
1967 }
1968
1969 static struct seq_operations tracer_seq_ops = {
1970         .start          = s_start,
1971         .next           = s_next,
1972         .stop           = s_stop,
1973         .show           = s_show,
1974 };
1975
1976 static struct trace_iterator *
1977 __tracing_open(struct inode *inode, struct file *file)
1978 {
1979         long cpu_file = (long) inode->i_private;
1980         void *fail_ret = ERR_PTR(-ENOMEM);
1981         struct trace_iterator *iter;
1982         struct seq_file *m;
1983         int cpu, ret;
1984
1985         if (tracing_disabled)
1986                 return ERR_PTR(-ENODEV);
1987
1988         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1989         if (!iter)
1990                 return ERR_PTR(-ENOMEM);
1991
1992         /*
1993          * We make a copy of the current tracer to avoid concurrent
1994          * changes on it while we are reading.
1995          */
1996         mutex_lock(&trace_types_lock);
1997         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1998         if (!iter->trace)
1999                 goto fail;
2000
2001         if (current_trace)
2002                 *iter->trace = *current_trace;
2003
2004         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
2005                 goto fail;
2006
2007         cpumask_clear(iter->started);
2008
2009         if (current_trace && current_trace->print_max)
2010                 iter->tr = &max_tr;
2011         else
2012                 iter->tr = &global_trace;
2013         iter->pos = -1;
2014         mutex_init(&iter->mutex);
2015         iter->cpu_file = cpu_file;
2016
2017         /* Notify the tracer early; before we stop tracing. */
2018         if (iter->trace && iter->trace->open)
2019                 iter->trace->open(iter);
2020
2021         /* Annotate start of buffers if we had overruns */
2022         if (ring_buffer_overruns(iter->tr->buffer))
2023                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2024
2025         /* stop the trace while dumping */
2026         tracing_stop();
2027
2028         if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2029                 for_each_tracing_cpu(cpu) {
2030
2031                         iter->buffer_iter[cpu] =
2032                                 ring_buffer_read_start(iter->tr->buffer, cpu);
2033                         tracing_iter_reset(iter, cpu);
2034                 }
2035         } else {
2036                 cpu = iter->cpu_file;
2037                 iter->buffer_iter[cpu] =
2038                                 ring_buffer_read_start(iter->tr->buffer, cpu);
2039                 tracing_iter_reset(iter, cpu);
2040         }
2041
2042         ret = seq_open(file, &tracer_seq_ops);
2043         if (ret < 0) {
2044                 fail_ret = ERR_PTR(ret);
2045                 goto fail_buffer;
2046         }
2047
2048         m = file->private_data;
2049         m->private = iter;
2050
2051         mutex_unlock(&trace_types_lock);
2052
2053         return iter;
2054
2055  fail_buffer:
2056         for_each_tracing_cpu(cpu) {
2057                 if (iter->buffer_iter[cpu])
2058                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2059         }
2060         free_cpumask_var(iter->started);
2061         tracing_start();
2062  fail:
2063         mutex_unlock(&trace_types_lock);
2064         kfree(iter->trace);
2065         kfree(iter);
2066
2067         return fail_ret;
2068 }
2069
2070 int tracing_open_generic(struct inode *inode, struct file *filp)
2071 {
2072         if (tracing_disabled)
2073                 return -ENODEV;
2074
2075         filp->private_data = inode->i_private;
2076         return 0;
2077 }
2078
2079 static int tracing_release(struct inode *inode, struct file *file)
2080 {
2081         struct seq_file *m = (struct seq_file *)file->private_data;
2082         struct trace_iterator *iter;
2083         int cpu;
2084
2085         if (!(file->f_mode & FMODE_READ))
2086                 return 0;
2087
2088         iter = m->private;
2089
2090         mutex_lock(&trace_types_lock);
2091         for_each_tracing_cpu(cpu) {
2092                 if (iter->buffer_iter[cpu])
2093                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2094         }
2095
2096         if (iter->trace && iter->trace->close)
2097                 iter->trace->close(iter);
2098
2099         /* reenable tracing if it was previously enabled */
2100         tracing_start();
2101         mutex_unlock(&trace_types_lock);
2102
2103         seq_release(inode, file);
2104         mutex_destroy(&iter->mutex);
2105         free_cpumask_var(iter->started);
2106         kfree(iter->trace);
2107         kfree(iter);
2108         return 0;
2109 }
2110
2111 static int tracing_open(struct inode *inode, struct file *file)
2112 {
2113         struct trace_iterator *iter;
2114         int ret = 0;
2115
2116         /* If this file was open for write, then erase contents */
2117         if ((file->f_mode & FMODE_WRITE) &&
2118             (file->f_flags & O_TRUNC)) {
2119                 long cpu = (long) inode->i_private;
2120
2121                 if (cpu == TRACE_PIPE_ALL_CPU)
2122                         tracing_reset_online_cpus(&global_trace);
2123                 else
2124                         tracing_reset(&global_trace, cpu);
2125         }
2126
2127         if (file->f_mode & FMODE_READ) {
2128                 iter = __tracing_open(inode, file);
2129                 if (IS_ERR(iter))
2130                         ret = PTR_ERR(iter);
2131                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2132                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
2133         }
2134         return ret;
2135 }
2136
2137 static void *
2138 t_next(struct seq_file *m, void *v, loff_t *pos)
2139 {
2140         struct tracer *t = v;
2141
2142         (*pos)++;
2143
2144         if (t)
2145                 t = t->next;
2146
2147         return t;
2148 }
2149
2150 static void *t_start(struct seq_file *m, loff_t *pos)
2151 {
2152         struct tracer *t;
2153         loff_t l = 0;
2154
2155         mutex_lock(&trace_types_lock);
2156         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2157                 ;
2158
2159         return t;
2160 }
2161
2162 static void t_stop(struct seq_file *m, void *p)
2163 {
2164         mutex_unlock(&trace_types_lock);
2165 }
2166
2167 static int t_show(struct seq_file *m, void *v)
2168 {
2169         struct tracer *t = v;
2170
2171         if (!t)
2172                 return 0;
2173
2174         seq_printf(m, "%s", t->name);
2175         if (t->next)
2176                 seq_putc(m, ' ');
2177         else
2178                 seq_putc(m, '\n');
2179
2180         return 0;
2181 }
2182
2183 static struct seq_operations show_traces_seq_ops = {
2184         .start          = t_start,
2185         .next           = t_next,
2186         .stop           = t_stop,
2187         .show           = t_show,
2188 };
2189
2190 static int show_traces_open(struct inode *inode, struct file *file)
2191 {
2192         if (tracing_disabled)
2193                 return -ENODEV;
2194
2195         return seq_open(file, &show_traces_seq_ops);
2196 }
2197
2198 static ssize_t
2199 tracing_write_stub(struct file *filp, const char __user *ubuf,
2200                    size_t count, loff_t *ppos)
2201 {
2202         return count;
2203 }
2204
2205 static const struct file_operations tracing_fops = {
2206         .open           = tracing_open,
2207         .read           = seq_read,
2208         .write          = tracing_write_stub,
2209         .llseek         = seq_lseek,
2210         .release        = tracing_release,
2211 };
2212
2213 static const struct file_operations show_traces_fops = {
2214         .open           = show_traces_open,
2215         .read           = seq_read,
2216         .release        = seq_release,
2217 };
2218
2219 /*
2220  * Only trace on a CPU if the bitmask is set:
2221  */
2222 static cpumask_var_t tracing_cpumask;
2223
2224 /*
2225  * The tracer itself will not take this lock, but still we want
2226  * to provide a consistent cpumask to user-space:
2227  */
2228 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2229
2230 /*
2231  * Temporary storage for the character representation of the
2232  * CPU bitmask (and one more byte for the newline):
2233  */
2234 static char mask_str[NR_CPUS + 1];
2235
2236 static ssize_t
2237 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2238                      size_t count, loff_t *ppos)
2239 {
2240         int len;
2241
2242         mutex_lock(&tracing_cpumask_update_lock);
2243
2244         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2245         if (count - len < 2) {
2246                 count = -EINVAL;
2247                 goto out_err;
2248         }
2249         len += sprintf(mask_str + len, "\n");
2250         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2251
2252 out_err:
2253         mutex_unlock(&tracing_cpumask_update_lock);
2254
2255         return count;
2256 }
2257
2258 static ssize_t
2259 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2260                       size_t count, loff_t *ppos)
2261 {
2262         int err, cpu;
2263         cpumask_var_t tracing_cpumask_new;
2264
2265         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2266                 return -ENOMEM;
2267
2268         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2269         if (err)
2270                 goto err_unlock;
2271
2272         mutex_lock(&tracing_cpumask_update_lock);
2273
2274         local_irq_disable();
2275         __raw_spin_lock(&ftrace_max_lock);
2276         for_each_tracing_cpu(cpu) {
2277                 /*
2278                  * Increase/decrease the disabled counter if we are
2279                  * about to flip a bit in the cpumask:
2280                  */
2281                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2282                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2283                         atomic_inc(&global_trace.data[cpu]->disabled);
2284                 }
2285                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2286                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2287                         atomic_dec(&global_trace.data[cpu]->disabled);
2288                 }
2289         }
2290         __raw_spin_unlock(&ftrace_max_lock);
2291         local_irq_enable();
2292
2293         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2294
2295         mutex_unlock(&tracing_cpumask_update_lock);
2296         free_cpumask_var(tracing_cpumask_new);
2297
2298         return count;
2299
2300 err_unlock:
2301         free_cpumask_var(tracing_cpumask_new);
2302
2303         return err;
2304 }
2305
2306 static const struct file_operations tracing_cpumask_fops = {
2307         .open           = tracing_open_generic,
2308         .read           = tracing_cpumask_read,
2309         .write          = tracing_cpumask_write,
2310 };
2311
2312 static ssize_t
2313 tracing_trace_options_read(struct file *filp, char __user *ubuf,
2314                        size_t cnt, loff_t *ppos)
2315 {
2316         struct tracer_opt *trace_opts;
2317         u32 tracer_flags;
2318         int len = 0;
2319         char *buf;
2320         int r = 0;
2321         int i;
2322
2323
2324         /* calculate max size */
2325         for (i = 0; trace_options[i]; i++) {
2326                 len += strlen(trace_options[i]);
2327                 len += 3; /* "no" and newline */
2328         }
2329
2330         mutex_lock(&trace_types_lock);
2331         tracer_flags = current_trace->flags->val;
2332         trace_opts = current_trace->flags->opts;
2333
2334         /*
2335          * Increase the size with names of options specific
2336          * of the current tracer.
2337          */
2338         for (i = 0; trace_opts[i].name; i++) {
2339                 len += strlen(trace_opts[i].name);
2340                 len += 3; /* "no" and newline */
2341         }
2342
2343         /* +1 for \0 */
2344         buf = kmalloc(len + 1, GFP_KERNEL);
2345         if (!buf) {
2346                 mutex_unlock(&trace_types_lock);
2347                 return -ENOMEM;
2348         }
2349
2350         for (i = 0; trace_options[i]; i++) {
2351                 if (trace_flags & (1 << i))
2352                         r += sprintf(buf + r, "%s\n", trace_options[i]);
2353                 else
2354                         r += sprintf(buf + r, "no%s\n", trace_options[i]);
2355         }
2356
2357         for (i = 0; trace_opts[i].name; i++) {
2358                 if (tracer_flags & trace_opts[i].bit)
2359                         r += sprintf(buf + r, "%s\n",
2360                                 trace_opts[i].name);
2361                 else
2362                         r += sprintf(buf + r, "no%s\n",
2363                                 trace_opts[i].name);
2364         }
2365         mutex_unlock(&trace_types_lock);
2366
2367         WARN_ON(r >= len + 1);
2368
2369         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2370
2371         kfree(buf);
2372         return r;
2373 }
2374
2375 /* Try to assign a tracer specific option */
2376 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2377 {
2378         struct tracer_flags *tracer_flags = trace->flags;
2379         struct tracer_opt *opts = NULL;
2380         int ret = 0, i = 0;
2381         int len;
2382
2383         for (i = 0; tracer_flags->opts[i].name; i++) {
2384                 opts = &tracer_flags->opts[i];
2385                 len = strlen(opts->name);
2386
2387                 if (strncmp(cmp, opts->name, len) == 0) {
2388                         ret = trace->set_flag(tracer_flags->val,
2389                                 opts->bit, !neg);
2390                         break;
2391                 }
2392         }
2393         /* Not found */
2394         if (!tracer_flags->opts[i].name)
2395                 return -EINVAL;
2396
2397         /* Refused to handle */
2398         if (ret)
2399                 return ret;
2400
2401         if (neg)
2402                 tracer_flags->val &= ~opts->bit;
2403         else
2404                 tracer_flags->val |= opts->bit;
2405
2406         return 0;
2407 }
2408
2409 static void set_tracer_flags(unsigned int mask, int enabled)
2410 {
2411         /* do nothing if flag is already set */
2412         if (!!(trace_flags & mask) == !!enabled)
2413                 return;
2414
2415         if (enabled)
2416                 trace_flags |= mask;
2417         else
2418                 trace_flags &= ~mask;
2419 }
2420
2421 static ssize_t
2422 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2423                         size_t cnt, loff_t *ppos)
2424 {
2425         char buf[64];
2426         char *cmp = buf;
2427         int neg = 0;
2428         int ret;
2429         int i;
2430
2431         if (cnt >= sizeof(buf))
2432                 return -EINVAL;
2433
2434         if (copy_from_user(&buf, ubuf, cnt))
2435                 return -EFAULT;
2436
2437         buf[cnt] = 0;
2438
2439         if (strncmp(buf, "no", 2) == 0) {
2440                 neg = 1;
2441                 cmp += 2;
2442         }
2443
2444         for (i = 0; trace_options[i]; i++) {
2445                 int len = strlen(trace_options[i]);
2446
2447                 if (strncmp(cmp, trace_options[i], len) == 0) {
2448                         set_tracer_flags(1 << i, !neg);
2449                         break;
2450                 }
2451         }
2452
2453         /* If no option could be set, test the specific tracer options */
2454         if (!trace_options[i]) {
2455                 mutex_lock(&trace_types_lock);
2456                 ret = set_tracer_option(current_trace, cmp, neg);
2457                 mutex_unlock(&trace_types_lock);
2458                 if (ret)
2459                         return ret;
2460         }
2461
2462         filp->f_pos += cnt;
2463
2464         return cnt;
2465 }
2466
2467 static const struct file_operations tracing_iter_fops = {
2468         .open           = tracing_open_generic,
2469         .read           = tracing_trace_options_read,
2470         .write          = tracing_trace_options_write,
2471 };
2472
2473 static const char readme_msg[] =
2474         "tracing mini-HOWTO:\n\n"
2475         "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2476         "# cat /sys/kernel/debug/tracing/available_tracers\n"
2477         "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2478         "# cat /sys/kernel/debug/tracing/current_tracer\n"
2479         "nop\n"
2480         "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
2481         "# cat /sys/kernel/debug/tracing/current_tracer\n"
2482         "sched_switch\n"
2483         "# cat /sys/kernel/debug/tracing/trace_options\n"
2484         "noprint-parent nosym-offset nosym-addr noverbose\n"
2485         "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2486         "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
2487         "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2488         "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
2489 ;
2490
2491 static ssize_t
2492 tracing_readme_read(struct file *filp, char __user *ubuf,
2493                        size_t cnt, loff_t *ppos)
2494 {
2495         return simple_read_from_buffer(ubuf, cnt, ppos,
2496                                         readme_msg, strlen(readme_msg));
2497 }
2498
2499 static const struct file_operations tracing_readme_fops = {
2500         .open           = tracing_open_generic,
2501         .read           = tracing_readme_read,
2502 };
2503
2504 static ssize_t
2505 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2506                                 size_t cnt, loff_t *ppos)
2507 {
2508         char *buf_comm;
2509         char *file_buf;
2510         char *buf;
2511         int len = 0;
2512         int pid;
2513         int i;
2514
2515         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2516         if (!file_buf)
2517                 return -ENOMEM;
2518
2519         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2520         if (!buf_comm) {
2521                 kfree(file_buf);
2522                 return -ENOMEM;
2523         }
2524
2525         buf = file_buf;
2526
2527         for (i = 0; i < SAVED_CMDLINES; i++) {
2528                 int r;
2529
2530                 pid = map_cmdline_to_pid[i];
2531                 if (pid == -1 || pid == NO_CMDLINE_MAP)
2532                         continue;
2533
2534                 trace_find_cmdline(pid, buf_comm);
2535                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
2536                 buf += r;
2537                 len += r;
2538         }
2539
2540         len = simple_read_from_buffer(ubuf, cnt, ppos,
2541                                       file_buf, len);
2542
2543         kfree(file_buf);
2544         kfree(buf_comm);
2545
2546         return len;
2547 }
2548
2549 static const struct file_operations tracing_saved_cmdlines_fops = {
2550     .open       = tracing_open_generic,
2551     .read       = tracing_saved_cmdlines_read,
2552 };
2553
2554 static ssize_t
2555 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2556                   size_t cnt, loff_t *ppos)
2557 {
2558         char buf[64];
2559         int r;
2560
2561         r = sprintf(buf, "%u\n", tracer_enabled);
2562         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2563 }
2564
2565 static ssize_t
2566 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2567                    size_t cnt, loff_t *ppos)
2568 {
2569         struct trace_array *tr = filp->private_data;
2570         char buf[64];
2571         unsigned long val;
2572         int ret;
2573
2574         if (cnt >= sizeof(buf))
2575                 return -EINVAL;
2576
2577         if (copy_from_user(&buf, ubuf, cnt))
2578                 return -EFAULT;
2579
2580         buf[cnt] = 0;
2581
2582         ret = strict_strtoul(buf, 10, &val);
2583         if (ret < 0)
2584                 return ret;
2585
2586         val = !!val;
2587
2588         mutex_lock(&trace_types_lock);
2589         if (tracer_enabled ^ val) {
2590                 if (val) {
2591                         tracer_enabled = 1;
2592                         if (current_trace->start)
2593                                 current_trace->start(tr);
2594                         tracing_start();
2595                 } else {
2596                         tracer_enabled = 0;
2597                         tracing_stop();
2598                         if (current_trace->stop)
2599                                 current_trace->stop(tr);
2600                 }
2601         }
2602         mutex_unlock(&trace_types_lock);
2603
2604         filp->f_pos += cnt;
2605
2606         return cnt;
2607 }
2608
2609 static ssize_t
2610 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2611                        size_t cnt, loff_t *ppos)
2612 {
2613         char buf[max_tracer_type_len+2];
2614         int r;
2615
2616         mutex_lock(&trace_types_lock);
2617         if (current_trace)
2618                 r = sprintf(buf, "%s\n", current_trace->name);
2619         else
2620                 r = sprintf(buf, "\n");
2621         mutex_unlock(&trace_types_lock);
2622
2623         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2624 }
2625
2626 int tracer_init(struct tracer *t, struct trace_array *tr)
2627 {
2628         tracing_reset_online_cpus(tr);
2629         return t->init(tr);
2630 }
2631
2632 static int tracing_resize_ring_buffer(unsigned long size)
2633 {
2634         int ret;
2635
2636         /*
2637          * If kernel or user changes the size of the ring buffer
2638          * we use the size that was given, and we can forget about
2639          * expanding it later.
2640          */
2641         ring_buffer_expanded = 1;
2642
2643         ret = ring_buffer_resize(global_trace.buffer, size);
2644         if (ret < 0)
2645                 return ret;
2646
2647         ret = ring_buffer_resize(max_tr.buffer, size);
2648         if (ret < 0) {
2649                 int r;
2650
2651                 r = ring_buffer_resize(global_trace.buffer,
2652                                        global_trace.entries);
2653                 if (r < 0) {
2654                         /*
2655                          * AARGH! We are left with different
2656                          * size max buffer!!!!
2657                          * The max buffer is our "snapshot" buffer.
2658                          * When a tracer needs a snapshot (one of the
2659                          * latency tracers), it swaps the max buffer
2660                          * with the saved snap shot. We succeeded to
2661                          * update the size of the main buffer, but failed to
2662                          * update the size of the max buffer. But when we tried
2663                          * to reset the main buffer to the original size, we
2664                          * failed there too. This is very unlikely to
2665                          * happen, but if it does, warn and kill all
2666                          * tracing.
2667                          */
2668                         WARN_ON(1);
2669                         tracing_disabled = 1;
2670                 }
2671                 return ret;
2672         }
2673
2674         global_trace.entries = size;
2675
2676         return ret;
2677 }
2678
2679 /**
2680  * tracing_update_buffers - used by tracing facility to expand ring buffers
2681  *
2682  * To save on memory when the tracing is never used on a system with it
2683  * configured in. The ring buffers are set to a minimum size. But once
2684  * a user starts to use the tracing facility, then they need to grow
2685  * to their default size.
2686  *
2687  * This function is to be called when a tracer is about to be used.
2688  */
2689 int tracing_update_buffers(void)
2690 {
2691         int ret = 0;
2692
2693         mutex_lock(&trace_types_lock);
2694         if (!ring_buffer_expanded)
2695                 ret = tracing_resize_ring_buffer(trace_buf_size);
2696         mutex_unlock(&trace_types_lock);
2697
2698         return ret;
2699 }
2700
2701 struct trace_option_dentry;
2702
2703 static struct trace_option_dentry *
2704 create_trace_option_files(struct tracer *tracer);
2705
2706 static void
2707 destroy_trace_option_files(struct trace_option_dentry *topts);
2708
2709 static int tracing_set_tracer(const char *buf)
2710 {
2711         static struct trace_option_dentry *topts;
2712         struct trace_array *tr = &global_trace;
2713         struct tracer *t;
2714         int ret = 0;
2715
2716         mutex_lock(&trace_types_lock);
2717
2718         if (!ring_buffer_expanded) {
2719                 ret = tracing_resize_ring_buffer(trace_buf_size);
2720                 if (ret < 0)
2721                         goto out;
2722                 ret = 0;
2723         }
2724
2725         for (t = trace_types; t; t = t->next) {
2726                 if (strcmp(t->name, buf) == 0)
2727                         break;
2728         }
2729         if (!t) {
2730                 ret = -EINVAL;
2731                 goto out;
2732         }
2733         if (t == current_trace)
2734                 goto out;
2735
2736         trace_branch_disable();
2737         if (current_trace && current_trace->reset)
2738                 current_trace->reset(tr);
2739
2740         destroy_trace_option_files(topts);
2741
2742         current_trace = t;
2743
2744         topts = create_trace_option_files(current_trace);
2745
2746         if (t->init) {
2747                 ret = tracer_init(t, tr);
2748                 if (ret)
2749                         goto out;
2750         }
2751
2752         trace_branch_enable(tr);
2753  out:
2754         mutex_unlock(&trace_types_lock);
2755
2756         return ret;
2757 }
2758
2759 static ssize_t
2760 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2761                         size_t cnt, loff_t *ppos)
2762 {
2763         char buf[max_tracer_type_len+1];
2764         int i;
2765         size_t ret;
2766         int err;
2767
2768         ret = cnt;
2769
2770         if (cnt > max_tracer_type_len)
2771                 cnt = max_tracer_type_len;
2772
2773         if (copy_from_user(&buf, ubuf, cnt))
2774                 return -EFAULT;
2775
2776         buf[cnt] = 0;
2777
2778         /* strip ending whitespace. */
2779         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2780                 buf[i] = 0;
2781
2782         err = tracing_set_tracer(buf);
2783         if (err)
2784                 return err;
2785
2786         filp->f_pos += ret;
2787
2788         return ret;
2789 }
2790
2791 static ssize_t
2792 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2793                      size_t cnt, loff_t *ppos)
2794 {
2795         unsigned long *ptr = filp->private_data;
2796         char buf[64];
2797         int r;
2798
2799         r = snprintf(buf, sizeof(buf), "%ld\n",
2800                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2801         if (r > sizeof(buf))
2802                 r = sizeof(buf);
2803         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2804 }
2805
2806 static ssize_t
2807 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2808                       size_t cnt, loff_t *ppos)
2809 {
2810         unsigned long *ptr = filp->private_data;
2811         char buf[64];
2812         unsigned long val;
2813         int ret;
2814
2815         if (cnt >= sizeof(buf))
2816                 return -EINVAL;
2817
2818         if (copy_from_user(&buf, ubuf, cnt))
2819                 return -EFAULT;
2820
2821         buf[cnt] = 0;
2822
2823         ret = strict_strtoul(buf, 10, &val);
2824         if (ret < 0)
2825                 return ret;
2826
2827         *ptr = val * 1000;
2828
2829         return cnt;
2830 }
2831
2832 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2833 {
2834         long cpu_file = (long) inode->i_private;
2835         struct trace_iterator *iter;
2836         int ret = 0;
2837
2838         if (tracing_disabled)
2839                 return -ENODEV;
2840
2841         mutex_lock(&trace_types_lock);
2842
2843         /* We only allow one reader per cpu */
2844         if (cpu_file == TRACE_PIPE_ALL_CPU) {
2845                 if (!cpumask_empty(tracing_reader_cpumask)) {
2846                         ret = -EBUSY;
2847                         goto out;
2848                 }
2849                 cpumask_setall(tracing_reader_cpumask);
2850         } else {
2851                 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2852                         cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2853                 else {
2854                         ret = -EBUSY;
2855                         goto out;
2856                 }
2857         }
2858
2859         /* create a buffer to store the information to pass to userspace */
2860         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2861         if (!iter) {
2862                 ret = -ENOMEM;
2863                 goto out;
2864         }
2865
2866         /*
2867          * We make a copy of the current tracer to avoid concurrent
2868          * changes on it while we are reading.
2869          */
2870         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2871         if (!iter->trace) {
2872                 ret = -ENOMEM;
2873                 goto fail;
2874         }
2875         if (current_trace)
2876                 *iter->trace = *current_trace;
2877
2878         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2879                 ret = -ENOMEM;
2880                 goto fail;
2881         }
2882
2883         /* trace pipe does not show start of buffer */
2884         cpumask_setall(iter->started);
2885
2886         if (trace_flags & TRACE_ITER_LATENCY_FMT)
2887                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2888
2889         iter->cpu_file = cpu_file;
2890         iter->tr = &global_trace;
2891         mutex_init(&iter->mutex);
2892         filp->private_data = iter;
2893
2894         if (iter->trace->pipe_open)
2895                 iter->trace->pipe_open(iter);
2896
2897 out:
2898         mutex_unlock(&trace_types_lock);
2899         return ret;
2900
2901 fail:
2902         kfree(iter->trace);
2903         kfree(iter);
2904         mutex_unlock(&trace_types_lock);
2905         return ret;
2906 }
2907
2908 static int tracing_release_pipe(struct inode *inode, struct file *file)
2909 {
2910         struct trace_iterator *iter = file->private_data;
2911
2912         mutex_lock(&trace_types_lock);
2913
2914         if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2915                 cpumask_clear(tracing_reader_cpumask);
2916         else
2917                 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2918
2919         mutex_unlock(&trace_types_lock);
2920
2921         free_cpumask_var(iter->started);
2922         mutex_destroy(&iter->mutex);
2923         kfree(iter->trace);
2924         kfree(iter);
2925
2926         return 0;
2927 }
2928
2929 static unsigned int
2930 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2931 {
2932         struct trace_iterator *iter = filp->private_data;
2933
2934         if (trace_flags & TRACE_ITER_BLOCK) {
2935                 /*
2936                  * Always select as readable when in blocking mode
2937                  */
2938                 return POLLIN | POLLRDNORM;
2939         } else {
2940                 if (!trace_empty(iter))
2941                         return POLLIN | POLLRDNORM;
2942                 poll_wait(filp, &trace_wait, poll_table);
2943                 if (!trace_empty(iter))
2944                         return POLLIN | POLLRDNORM;
2945
2946                 return 0;
2947         }
2948 }
2949
2950
2951 void default_wait_pipe(struct trace_iterator *iter)
2952 {
2953         DEFINE_WAIT(wait);
2954
2955         prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
2956
2957         if (trace_empty(iter))
2958                 schedule();
2959
2960         finish_wait(&trace_wait, &wait);
2961 }
2962
2963 /*
2964  * This is a make-shift waitqueue.
2965  * A tracer might use this callback on some rare cases:
2966  *
2967  *  1) the current tracer might hold the runqueue lock when it wakes up
2968  *     a reader, hence a deadlock (sched, function, and function graph tracers)
2969  *  2) the function tracers, trace all functions, we don't want
2970  *     the overhead of calling wake_up and friends
2971  *     (and tracing them too)
2972  *
2973  *     Anyway, this is really very primitive wakeup.
2974  */
2975 void poll_wait_pipe(struct trace_iterator *iter)
2976 {
2977         set_current_state(TASK_INTERRUPTIBLE);
2978         /* sleep for 100 msecs, and try again. */
2979         schedule_timeout(HZ / 10);
2980 }
2981
2982 /* Must be called with trace_types_lock mutex held. */
2983 static int tracing_wait_pipe(struct file *filp)
2984 {
2985         struct trace_iterator *iter = filp->private_data;
2986
2987         while (trace_empty(iter)) {
2988
2989                 if ((filp->f_flags & O_NONBLOCK)) {
2990                         return -EAGAIN;
2991                 }
2992
2993                 mutex_unlock(&iter->mutex);
2994
2995                 iter->trace->wait_pipe(iter);
2996
2997                 mutex_lock(&iter->mutex);
2998
2999                 if (signal_pending(current))
3000                         return -EINTR;
3001
3002                 /*
3003                  * We block until we read something and tracing is disabled.
3004                  * We still block if tracing is disabled, but we have never
3005                  * read anything. This allows a user to cat this file, and
3006                  * then enable tracing. But after we have read something,
3007                  * we give an EOF when tracing is again disabled.
3008                  *
3009                  * iter->pos will be 0 if we haven't read anything.
3010                  */
3011                 if (!tracer_enabled && iter->pos)
3012                         break;
3013         }
3014
3015         return 1;
3016 }
3017
3018 /*
3019  * Consumer reader.
3020  */
3021 static ssize_t
3022 tracing_read_pipe(struct file *filp, char __user *ubuf,
3023                   size_t cnt, loff_t *ppos)
3024 {
3025         struct trace_iterator *iter = filp->private_data;
3026         static struct tracer *old_tracer;
3027         ssize_t sret;
3028
3029         /* return any leftover data */
3030         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3031         if (sret != -EBUSY)
3032                 return sret;
3033
3034         trace_seq_init(&iter->seq);
3035
3036         /* copy the tracer to avoid using a global lock all around */
3037         mutex_lock(&trace_types_lock);
3038         if (unlikely(old_tracer != current_trace && current_trace)) {
3039                 old_tracer = current_trace;
3040                 *iter->trace = *current_trace;
3041         }
3042         mutex_unlock(&trace_types_lock);
3043
3044         /*
3045          * Avoid more than one consumer on a single file descriptor
3046          * This is just a matter of traces coherency, the ring buffer itself
3047          * is protected.
3048          */
3049         mutex_lock(&iter->mutex);
3050         if (iter->trace->read) {
3051                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
3052                 if (sret)
3053                         goto out;
3054         }
3055
3056 waitagain:
3057         sret = tracing_wait_pipe(filp);
3058         if (sret <= 0)
3059                 goto out;
3060
3061         /* stop when tracing is finished */
3062         if (trace_empty(iter)) {
3063                 sret = 0;
3064                 goto out;
3065         }
3066
3067         if (cnt >= PAGE_SIZE)
3068                 cnt = PAGE_SIZE - 1;
3069
3070         /* reset all but tr, trace, and overruns */
3071         memset(&iter->seq, 0,
3072                sizeof(struct trace_iterator) -
3073                offsetof(struct trace_iterator, seq));
3074         iter->pos = -1;
3075
3076         trace_event_read_lock();
3077         while (find_next_entry_inc(iter) != NULL) {
3078                 enum print_line_t ret;
3079                 int len = iter->seq.len;
3080
3081                 ret = print_trace_line(iter);
3082                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3083                         /* don't print partial lines */
3084                         iter->seq.len = len;
3085                         break;
3086                 }
3087                 if (ret != TRACE_TYPE_NO_CONSUME)
3088                         trace_consume(iter);
3089
3090                 if (iter->seq.len >= cnt)
3091                         break;
3092         }
3093         trace_event_read_unlock();
3094
3095         /* Now copy what we have to the user */
3096         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3097         if (iter->seq.readpos >= iter->seq.len)
3098                 trace_seq_init(&iter->seq);
3099
3100         /*
3101          * If there was nothing to send to user, inspite of consuming trace
3102          * entries, go back to wait for more entries.
3103          */
3104         if (sret == -EBUSY)
3105                 goto waitagain;
3106
3107 out:
3108         mutex_unlock(&iter->mutex);
3109
3110         return sret;
3111 }
3112
3113 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
3114                                      struct pipe_buffer *buf)
3115 {
3116         __free_page(buf->page);
3117 }
3118
3119 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3120                                      unsigned int idx)
3121 {
3122         __free_page(spd->pages[idx]);
3123 }
3124
3125 static struct pipe_buf_operations tracing_pipe_buf_ops = {
3126         .can_merge              = 0,
3127         .map                    = generic_pipe_buf_map,
3128         .unmap                  = generic_pipe_buf_unmap,
3129         .confirm                = generic_pipe_buf_confirm,
3130         .release                = tracing_pipe_buf_release,
3131         .steal                  = generic_pipe_buf_steal,
3132         .get                    = generic_pipe_buf_get,
3133 };
3134
3135 static size_t
3136 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3137 {
3138         size_t count;
3139         int ret;
3140
3141         /* Seq buffer is page-sized, exactly what we need. */
3142         for (;;) {
3143                 count = iter->seq.len;
3144                 ret = print_trace_line(iter);
3145                 count = iter->seq.len - count;
3146                 if (rem < count) {
3147                         rem = 0;
3148                         iter->seq.len -= count;
3149                         break;
3150                 }
3151                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3152                         iter->seq.len -= count;
3153                         break;
3154                 }
3155
3156                 if (ret != TRACE_TYPE_NO_CONSUME)
3157                         trace_consume(iter);
3158                 rem -= count;
3159                 if (!find_next_entry_inc(iter)) {
3160                         rem = 0;
3161                         iter->ent = NULL;
3162                         break;
3163                 }
3164         }
3165
3166         return rem;
3167 }
3168
3169 static ssize_t tracing_splice_read_pipe(struct file *filp,
3170                                         loff_t *ppos,
3171                                         struct pipe_inode_info *pipe,
3172                                         size_t len,
3173                                         unsigned int flags)
3174 {
3175         struct page *pages[PIPE_BUFFERS];
3176         struct partial_page partial[PIPE_BUFFERS];
3177         struct trace_iterator *iter = filp->private_data;
3178         struct splice_pipe_desc spd = {
3179                 .pages          = pages,
3180                 .partial        = partial,
3181                 .nr_pages       = 0, /* This gets updated below. */
3182                 .flags          = flags,
3183                 .ops            = &tracing_pipe_buf_ops,
3184                 .spd_release    = tracing_spd_release_pipe,
3185         };
3186         static struct tracer *old_tracer;
3187         ssize_t ret;
3188         size_t rem;
3189         unsigned int i;
3190
3191         /* copy the tracer to avoid using a global lock all around */
3192         mutex_lock(&trace_types_lock);
3193         if (unlikely(old_tracer != current_trace && current_trace)) {
3194                 old_tracer = current_trace;
3195                 *iter->trace = *current_trace;
3196         }
3197         mutex_unlock(&trace_types_lock);
3198
3199         mutex_lock(&iter->mutex);
3200
3201         if (iter->trace->splice_read) {
3202                 ret = iter->trace->splice_read(iter, filp,
3203                                                ppos, pipe, len, flags);
3204                 if (ret)
3205                         goto out_err;
3206         }
3207
3208         ret = tracing_wait_pipe(filp);
3209         if (ret <= 0)
3210                 goto out_err;
3211
3212         if (!iter->ent && !find_next_entry_inc(iter)) {
3213                 ret = -EFAULT;
3214                 goto out_err;
3215         }
3216
3217         trace_event_read_lock();
3218
3219         /* Fill as many pages as possible. */
3220         for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
3221                 pages[i] = alloc_page(GFP_KERNEL);
3222                 if (!pages[i])
3223                         break;
3224
3225                 rem = tracing_fill_pipe_page(rem, iter);
3226
3227                 /* Copy the data into the page, so we can start over. */
3228                 ret = trace_seq_to_buffer(&iter->seq,
3229                                           page_address(pages[i]),
3230                                           iter->seq.len);
3231                 if (ret < 0) {
3232                         __free_page(pages[i]);
3233                         break;
3234                 }
3235                 partial[i].offset = 0;
3236                 partial[i].len = iter->seq.len;
3237
3238                 trace_seq_init(&iter->seq);
3239         }
3240
3241         trace_event_read_unlock();
3242         mutex_unlock(&iter->mutex);
3243
3244         spd.nr_pages = i;
3245
3246         return splice_to_pipe(pipe, &spd);
3247
3248 out_err:
3249         mutex_unlock(&iter->mutex);
3250
3251         return ret;
3252 }
3253
3254 static ssize_t
3255 tracing_entries_read(struct file *filp, char __user *ubuf,
3256                      size_t cnt, loff_t *ppos)
3257 {
3258         struct trace_array *tr = filp->private_data;
3259         char buf[96];
3260         int r;
3261
3262         mutex_lock(&trace_types_lock);
3263         if (!ring_buffer_expanded)
3264                 r = sprintf(buf, "%lu (expanded: %lu)\n",
3265                             tr->entries >> 10,
3266                             trace_buf_size >> 10);
3267         else
3268                 r = sprintf(buf, "%lu\n", tr->entries >> 10);
3269         mutex_unlock(&trace_types_lock);
3270
3271         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3272 }
3273
3274 static ssize_t
3275 tracing_entries_write(struct file *filp, const char __user *ubuf,
3276                       size_t cnt, loff_t *ppos)
3277 {
3278         unsigned long val;
3279         char buf[64];
3280         int ret, cpu;
3281
3282         if (cnt >= sizeof(buf))
3283                 return -EINVAL;
3284
3285         if (copy_from_user(&buf, ubuf, cnt))
3286                 return -EFAULT;
3287
3288         buf[cnt] = 0;
3289
3290         ret = strict_strtoul(buf, 10, &val);
3291         if (ret < 0)
3292                 return ret;
3293
3294         /* must have at least 1 entry */
3295         if (!val)
3296                 return -EINVAL;
3297
3298         mutex_lock(&trace_types_lock);
3299
3300         tracing_stop();
3301
3302         /* disable all cpu buffers */
3303         for_each_tracing_cpu(cpu) {
3304                 if (global_trace.data[cpu])
3305                         atomic_inc(&global_trace.data[cpu]->disabled);
3306                 if (max_tr.data[cpu])
3307                         atomic_inc(&max_tr.data[cpu]->disabled);
3308         }
3309
3310         /* value is in KB */
3311         val <<= 10;
3312
3313         if (val != global_trace.entries) {
3314                 ret = tracing_resize_ring_buffer(val);
3315                 if (ret < 0) {
3316                         cnt = ret;
3317                         goto out;
3318                 }
3319         }
3320
3321         filp->f_pos += cnt;
3322
3323         /* If check pages failed, return ENOMEM */
3324         if (tracing_disabled)
3325                 cnt = -ENOMEM;
3326  out:
3327         for_each_tracing_cpu(cpu) {
3328                 if (global_trace.data[cpu])
3329                         atomic_dec(&global_trace.data[cpu]->disabled);
3330                 if (max_tr.data[cpu])
3331                         atomic_dec(&max_tr.data[cpu]->disabled);
3332         }
3333
3334         tracing_start();
3335         max_tr.entries = global_trace.entries;
3336         mutex_unlock(&trace_types_lock);
3337
3338         return cnt;
3339 }
3340
3341 static int mark_printk(const char *fmt, ...)
3342 {
3343         int ret;
3344         va_list args;
3345         va_start(args, fmt);
3346         ret = trace_vprintk(0, fmt, args);
3347         va_end(args);
3348         return ret;
3349 }
3350
3351 static ssize_t
3352 tracing_mark_write(struct file *filp, const char __user *ubuf,
3353                                         size_t cnt, loff_t *fpos)
3354 {
3355         char *buf;
3356         char *end;
3357
3358         if (tracing_disabled)
3359                 return -EINVAL;
3360
3361         if (cnt > TRACE_BUF_SIZE)
3362                 cnt = TRACE_BUF_SIZE;
3363
3364         buf = kmalloc(cnt + 1, GFP_KERNEL);
3365         if (buf == NULL)
3366                 return -ENOMEM;
3367
3368         if (copy_from_user(buf, ubuf, cnt)) {
3369                 kfree(buf);
3370                 return -EFAULT;
3371         }
3372
3373         /* Cut from the first nil or newline. */
3374         buf[cnt] = '\0';
3375         end = strchr(buf, '\n');
3376         if (end)
3377                 *end = '\0';
3378
3379         cnt = mark_printk("%s\n", buf);
3380         kfree(buf);
3381         *fpos += cnt;
3382
3383         return cnt;
3384 }
3385
3386 static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
3387                                   size_t cnt, loff_t *ppos)
3388 {
3389         char buf[64];
3390         int bufiter = 0;
3391         int i;
3392
3393         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3394                 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
3395                         "%s%s%s%s", i ? " " : "",
3396                         i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3397                         i == trace_clock_id ? "]" : "");
3398         bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
3399
3400         return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
3401 }
3402
3403 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3404                                    size_t cnt, loff_t *fpos)
3405 {
3406         char buf[64];
3407         const char *clockstr;
3408         int i;
3409
3410         if (cnt >= sizeof(buf))
3411                 return -EINVAL;
3412
3413         if (copy_from_user(&buf, ubuf, cnt))
3414                 return -EFAULT;
3415
3416         buf[cnt] = 0;
3417
3418         clockstr = strstrip(buf);
3419
3420         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3421                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
3422                         break;
3423         }
3424         if (i == ARRAY_SIZE(trace_clocks))
3425                 return -EINVAL;
3426
3427         trace_clock_id = i;
3428
3429         mutex_lock(&trace_types_lock);
3430
3431         ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3432         if (max_tr.buffer)
3433                 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3434
3435         mutex_unlock(&trace_types_lock);
3436
3437         *fpos += cnt;
3438
3439         return cnt;
3440 }
3441
3442 static const struct file_operations tracing_max_lat_fops = {
3443         .open           = tracing_open_generic,
3444         .read           = tracing_max_lat_read,
3445         .write          = tracing_max_lat_write,
3446 };
3447
3448 static const struct file_operations tracing_ctrl_fops = {
3449         .open           = tracing_open_generic,
3450         .read           = tracing_ctrl_read,
3451         .write          = tracing_ctrl_write,
3452 };
3453
3454 static const struct file_operations set_tracer_fops = {
3455         .open           = tracing_open_generic,
3456         .read           = tracing_set_trace_read,
3457         .write          = tracing_set_trace_write,
3458 };
3459
3460 static const struct file_operations tracing_pipe_fops = {
3461         .open           = tracing_open_pipe,
3462         .poll           = tracing_poll_pipe,
3463         .read           = tracing_read_pipe,
3464         .splice_read    = tracing_splice_read_pipe,
3465         .release        = tracing_release_pipe,
3466 };
3467
3468 static const struct file_operations tracing_entries_fops = {
3469         .open           = tracing_open_generic,
3470         .read           = tracing_entries_read,
3471         .write          = tracing_entries_write,
3472 };
3473
3474 static const struct file_operations tracing_mark_fops = {
3475         .open           = tracing_open_generic,
3476         .write          = tracing_mark_write,
3477 };
3478
3479 static const struct file_operations trace_clock_fops = {
3480         .open           = tracing_open_generic,
3481         .read           = tracing_clock_read,
3482         .write          = tracing_clock_write,
3483 };
3484
3485 struct ftrace_buffer_info {
3486         struct trace_array      *tr;
3487         void                    *spare;
3488         int                     cpu;
3489         unsigned int            read;
3490 };
3491
3492 static int tracing_buffers_open(struct inode *inode, struct file *filp)
3493 {
3494         int cpu = (int)(long)inode->i_private;
3495         struct ftrace_buffer_info *info;
3496
3497         if (tracing_disabled)
3498                 return -ENODEV;
3499
3500         info = kzalloc(sizeof(*info), GFP_KERNEL);
3501         if (!info)
3502                 return -ENOMEM;
3503
3504         info->tr        = &global_trace;
3505         info->cpu       = cpu;
3506         info->spare     = NULL;
3507         /* Force reading ring buffer for first read */
3508         info->read      = (unsigned int)-1;
3509
3510         filp->private_data = info;
3511
3512         return nonseekable_open(inode, filp);
3513 }
3514
3515 static ssize_t
3516 tracing_buffers_read(struct file *filp, char __user *ubuf,
3517                      size_t count, loff_t *ppos)
3518 {
3519         struct ftrace_buffer_info *info = filp->private_data;
3520         unsigned int pos;
3521         ssize_t ret;
3522         size_t size;
3523
3524         if (!count)
3525                 return 0;
3526
3527         if (!info->spare)
3528                 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3529         if (!info->spare)
3530                 return -ENOMEM;
3531
3532         /* Do we have previous read data to read? */
3533         if (info->read < PAGE_SIZE)
3534                 goto read;
3535
3536         info->read = 0;
3537
3538         ret = ring_buffer_read_page(info->tr->buffer,
3539                                     &info->spare,
3540                                     count,
3541                                     info->cpu, 0);
3542         if (ret < 0)
3543                 return 0;
3544
3545         pos = ring_buffer_page_len(info->spare);
3546
3547         if (pos < PAGE_SIZE)
3548                 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3549
3550 read:
3551         size = PAGE_SIZE - info->read;
3552         if (size > count)
3553                 size = count;
3554
3555         ret = copy_to_user(ubuf, info->spare + info->read, size);
3556         if (ret == size)
3557                 return -EFAULT;
3558         size -= ret;
3559
3560         *ppos += size;
3561         info->read += size;
3562
3563         return size;
3564 }
3565
3566 static int tracing_buffers_release(struct inode *inode, struct file *file)
3567 {
3568         struct ftrace_buffer_info *info = file->private_data;
3569
3570         if (info->spare)
3571                 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3572         kfree(info);
3573
3574         return 0;
3575 }
3576
3577 struct buffer_ref {
3578         struct ring_buffer      *buffer;
3579         void                    *page;
3580         int                     ref;
3581 };
3582
3583 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3584                                     struct pipe_buffer *buf)
3585 {
3586         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3587
3588         if (--ref->ref)
3589                 return;
3590
3591         ring_buffer_free_read_page(ref->buffer, ref->page);
3592         kfree(ref);
3593         buf->private = 0;
3594 }
3595
3596 static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3597                                  struct pipe_buffer *buf)
3598 {
3599         return 1;
3600 }
3601
3602 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3603                                 struct pipe_buffer *buf)
3604 {
3605         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3606
3607         ref->ref++;
3608 }
3609
3610 /* Pipe buffer operations for a buffer. */
3611 static struct pipe_buf_operations buffer_pipe_buf_ops = {
3612         .can_merge              = 0,
3613         .map                    = generic_pipe_buf_map,
3614         .unmap                  = generic_pipe_buf_unmap,
3615         .confirm                = generic_pipe_buf_confirm,
3616         .release                = buffer_pipe_buf_release,
3617         .steal                  = buffer_pipe_buf_steal,
3618         .get                    = buffer_pipe_buf_get,
3619 };
3620
3621 /*
3622  * Callback from splice_to_pipe(), if we need to release some pages
3623  * at the end of the spd in case we error'ed out in filling the pipe.
3624  */
3625 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3626 {
3627         struct buffer_ref *ref =
3628                 (struct buffer_ref *)spd->partial[i].private;
3629
3630         if (--ref->ref)
3631                 return;
3632
3633         ring_buffer_free_read_page(ref->buffer, ref->page);
3634         kfree(ref);
3635         spd->partial[i].private = 0;
3636 }
3637
3638 static ssize_t
3639 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3640                             struct pipe_inode_info *pipe, size_t len,
3641                             unsigned int flags)
3642 {
3643         struct ftrace_buffer_info *info = file->private_data;
3644         struct partial_page partial[PIPE_BUFFERS];
3645         struct page *pages[PIPE_BUFFERS];
3646         struct splice_pipe_desc spd = {
3647                 .pages          = pages,
3648                 .partial        = partial,
3649                 .flags          = flags,
3650                 .ops            = &buffer_pipe_buf_ops,
3651                 .spd_release    = buffer_spd_release,
3652         };
3653         struct buffer_ref *ref;
3654         int entries, size, i;
3655         size_t ret;
3656
3657         if (*ppos & (PAGE_SIZE - 1)) {
3658                 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3659                 return -EINVAL;
3660         }
3661
3662         if (len & (PAGE_SIZE - 1)) {
3663                 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3664                 if (len < PAGE_SIZE)
3665                         return -EINVAL;
3666                 len &= PAGE_MASK;
3667         }
3668
3669         entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3670
3671         for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
3672                 struct page *page;
3673                 int r;
3674
3675                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3676                 if (!ref)
3677                         break;
3678
3679                 ref->ref = 1;
3680                 ref->buffer = info->tr->buffer;
3681                 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3682                 if (!ref->page) {
3683                         kfree(ref);
3684                         break;
3685                 }
3686
3687                 r = ring_buffer_read_page(ref->buffer, &ref->page,
3688                                           len, info->cpu, 1);
3689                 if (r < 0) {
3690                         ring_buffer_free_read_page(ref->buffer,
3691                                                    ref->page);
3692                         kfree(ref);
3693                         break;
3694                 }
3695
3696                 /*
3697                  * zero out any left over data, this is going to
3698                  * user land.
3699                  */
3700                 size = ring_buffer_page_len(ref->page);
3701                 if (size < PAGE_SIZE)
3702                         memset(ref->page + size, 0, PAGE_SIZE - size);
3703
3704                 page = virt_to_page(ref->page);
3705
3706                 spd.pages[i] = page;
3707                 spd.partial[i].len = PAGE_SIZE;
3708                 spd.partial[i].offset = 0;
3709                 spd.partial[i].private = (unsigned long)ref;
3710                 spd.nr_pages++;
3711                 *ppos += PAGE_SIZE;
3712
3713                 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3714         }
3715
3716         spd.nr_pages = i;
3717
3718         /* did we read anything? */
3719         if (!spd.nr_pages) {
3720                 if (flags & SPLICE_F_NONBLOCK)
3721                         ret = -EAGAIN;
3722                 else
3723                         ret = 0;
3724                 /* TODO: block */
3725                 return ret;
3726         }
3727
3728         ret = splice_to_pipe(pipe, &spd);
3729
3730         return ret;
3731 }
3732
3733 static const struct file_operations tracing_buffers_fops = {
3734         .open           = tracing_buffers_open,
3735         .read           = tracing_buffers_read,
3736         .release        = tracing_buffers_release,
3737         .splice_read    = tracing_buffers_splice_read,
3738         .llseek         = no_llseek,
3739 };
3740
3741 static ssize_t
3742 tracing_stats_read(struct file *filp, char __user *ubuf,
3743                    size_t count, loff_t *ppos)
3744 {
3745         unsigned long cpu = (unsigned long)filp->private_data;
3746         struct trace_array *tr = &global_trace;
3747         struct trace_seq *s;
3748         unsigned long cnt;
3749
3750         s = kmalloc(sizeof(*s), GFP_KERNEL);
3751         if (!s)
3752                 return ENOMEM;
3753
3754         trace_seq_init(s);
3755
3756         cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
3757         trace_seq_printf(s, "entries: %ld\n", cnt);
3758
3759         cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
3760         trace_seq_printf(s, "overrun: %ld\n", cnt);
3761
3762         cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3763         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3764
3765         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3766
3767         kfree(s);
3768
3769         return count;
3770 }
3771
3772 static const struct file_operations tracing_stats_fops = {
3773         .open           = tracing_open_generic,
3774         .read           = tracing_stats_read,
3775 };
3776
3777 #ifdef CONFIG_DYNAMIC_FTRACE
3778
3779 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3780 {
3781         return 0;
3782 }
3783
3784 static ssize_t
3785 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3786                   size_t cnt, loff_t *ppos)
3787 {
3788         static char ftrace_dyn_info_buffer[1024];
3789         static DEFINE_MUTEX(dyn_info_mutex);
3790         unsigned long *p = filp->private_data;
3791         char *buf = ftrace_dyn_info_buffer;
3792         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
3793         int r;
3794
3795         mutex_lock(&dyn_info_mutex);
3796         r = sprintf(buf, "%ld ", *p);
3797
3798         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3799         buf[r++] = '\n';
3800
3801         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3802
3803         mutex_unlock(&dyn_info_mutex);
3804
3805         return r;
3806 }
3807
3808 static const struct file_operations tracing_dyn_info_fops = {
3809         .open           = tracing_open_generic,
3810         .read           = tracing_read_dyn_info,
3811 };
3812 #endif
3813
3814 static struct dentry *d_tracer;
3815
3816 struct dentry *tracing_init_dentry(void)
3817 {
3818         static int once;
3819
3820         if (d_tracer)
3821                 return d_tracer;
3822
3823         if (!debugfs_initialized())
3824                 return NULL;
3825
3826         d_tracer = debugfs_create_dir("tracing", NULL);
3827
3828         if (!d_tracer && !once) {
3829                 once = 1;
3830                 pr_warning("Could not create debugfs directory 'tracing'\n");
3831                 return NULL;
3832         }
3833
3834         return d_tracer;
3835 }
3836
3837 static struct dentry *d_percpu;
3838
3839 struct dentry *tracing_dentry_percpu(void)
3840 {
3841         static int once;
3842         struct dentry *d_tracer;
3843
3844         if (d_percpu)
3845                 return d_percpu;
3846
3847         d_tracer = tracing_init_dentry();
3848
3849         if (!d_tracer)
3850                 return NULL;
3851
3852         d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3853
3854         if (!d_percpu && !once) {
3855                 once = 1;
3856                 pr_warning("Could not create debugfs directory 'per_cpu'\n");
3857                 return NULL;
3858         }
3859
3860         return d_percpu;
3861 }
3862
3863 static void tracing_init_debugfs_percpu(long cpu)
3864 {
3865         struct dentry *d_percpu = tracing_dentry_percpu();
3866         struct dentry *d_cpu;
3867         /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3868         char cpu_dir[7];
3869
3870         if (cpu > 999 || cpu < 0)
3871                 return;
3872
3873         sprintf(cpu_dir, "cpu%ld", cpu);
3874         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3875         if (!d_cpu) {
3876                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3877                 return;
3878         }
3879
3880         /* per cpu trace_pipe */
3881         trace_create_file("trace_pipe", 0444, d_cpu,
3882                         (void *) cpu, &tracing_pipe_fops);
3883
3884         /* per cpu trace */
3885         trace_create_file("trace", 0644, d_cpu,
3886                         (void *) cpu, &tracing_fops);
3887
3888         trace_create_file("trace_pipe_raw", 0444, d_cpu,
3889                         (void *) cpu, &tracing_buffers_fops);
3890
3891         trace_create_file("stats", 0444, d_cpu,
3892                         (void *) cpu, &tracing_stats_fops);
3893 }
3894
3895 #ifdef CONFIG_FTRACE_SELFTEST
3896 /* Let selftest have access to static functions in this file */
3897 #include "trace_selftest.c"
3898 #endif
3899
3900 struct trace_option_dentry {
3901         struct tracer_opt               *opt;
3902         struct tracer_flags             *flags;
3903         struct dentry                   *entry;
3904 };
3905
3906 static ssize_t
3907 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3908                         loff_t *ppos)
3909 {
3910         struct trace_option_dentry *topt = filp->private_data;
3911         char *buf;
3912
3913         if (topt->flags->val & topt->opt->bit)
3914                 buf = "1\n";
3915         else
3916                 buf = "0\n";
3917
3918         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3919 }
3920
3921 static ssize_t
3922 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3923                          loff_t *ppos)
3924 {
3925         struct trace_option_dentry *topt = filp->private_data;
3926         unsigned long val;
3927         char buf[64];
3928         int ret;
3929
3930         if (cnt >= sizeof(buf))
3931                 return -EINVAL;
3932
3933         if (copy_from_user(&buf, ubuf, cnt))
3934                 return -EFAULT;
3935
3936         buf[cnt] = 0;
3937
3938         ret = strict_strtoul(buf, 10, &val);
3939         if (ret < 0)
3940                 return ret;
3941
3942         ret = 0;
3943         switch (val) {
3944         case 0:
3945                 /* do nothing if already cleared */
3946                 if (!(topt->flags->val & topt->opt->bit))
3947                         break;
3948
3949                 mutex_lock(&trace_types_lock);
3950                 if (current_trace->set_flag)
3951                         ret = current_trace->set_flag(topt->flags->val,
3952                                                       topt->opt->bit, 0);
3953                 mutex_unlock(&trace_types_lock);
3954                 if (ret)
3955                         return ret;
3956                 topt->flags->val &= ~topt->opt->bit;
3957                 break;
3958         case 1:
3959                 /* do nothing if already set */
3960                 if (topt->flags->val & topt->opt->bit)
3961                         break;
3962
3963                 mutex_lock(&trace_types_lock);
3964                 if (current_trace->set_flag)
3965                         ret = current_trace->set_flag(topt->flags->val,
3966                                                       topt->opt->bit, 1);
3967                 mutex_unlock(&trace_types_lock);
3968                 if (ret)
3969                         return ret;
3970                 topt->flags->val |= topt->opt->bit;
3971                 break;
3972
3973         default:
3974                 return -EINVAL;
3975         }
3976
3977         *ppos += cnt;
3978
3979         return cnt;
3980 }
3981
3982
3983 static const struct file_operations trace_options_fops = {
3984         .open = tracing_open_generic,
3985         .read = trace_options_read,
3986         .write = trace_options_write,
3987 };
3988
3989 static ssize_t
3990 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3991                         loff_t *ppos)
3992 {
3993         long index = (long)filp->private_data;
3994         char *buf;
3995
3996         if (trace_flags & (1 << index))
3997                 buf = "1\n";
3998         else
3999                 buf = "0\n";
4000
4001         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
4002 }
4003
4004 static ssize_t
4005 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4006                          loff_t *ppos)
4007 {
4008         long index = (long)filp->private_data;
4009         char buf[64];
4010         unsigned long val;
4011         int ret;
4012
4013         if (cnt >= sizeof(buf))
4014                 return -EINVAL;
4015
4016         if (copy_from_user(&buf, ubuf, cnt))
4017                 return -EFAULT;
4018
4019         buf[cnt] = 0;
4020
4021         ret = strict_strtoul(buf, 10, &val);
4022         if (ret < 0)
4023                 return ret;
4024
4025         if (val != 0 && val != 1)
4026                 return -EINVAL;
4027         set_tracer_flags(1 << index, val);
4028
4029         *ppos += cnt;
4030
4031         return cnt;
4032 }
4033
4034 static const struct file_operations trace_options_core_fops = {
4035         .open = tracing_open_generic,
4036         .read = trace_options_core_read,
4037         .write = trace_options_core_write,
4038 };
4039
4040 struct dentry *trace_create_file(const char *name,
4041                                  mode_t mode,
4042                                  struct dentry *parent,
4043                                  void *data,
4044                                  const struct file_operations *fops)
4045 {
4046         struct dentry *ret;
4047
4048         ret = debugfs_create_file(name, mode, parent, data, fops);
4049         if (!ret)
4050                 pr_warning("Could not create debugfs '%s' entry\n", name);
4051
4052         return ret;
4053 }
4054
4055
4056 static struct dentry *trace_options_init_dentry(void)
4057 {
4058         struct dentry *d_tracer;
4059         static struct dentry *t_options;
4060
4061         if (t_options)
4062                 return t_options;
4063
4064         d_tracer = tracing_init_dentry();
4065         if (!d_tracer)
4066                 return NULL;
4067
4068         t_options = debugfs_create_dir("options", d_tracer);
4069         if (!t_options) {
4070                 pr_warning("Could not create debugfs directory 'options'\n");
4071                 return NULL;
4072         }
4073
4074         return t_options;
4075 }
4076
4077 static void
4078 create_trace_option_file(struct trace_option_dentry *topt,
4079                          struct tracer_flags *flags,
4080                          struct tracer_opt *opt)
4081 {
4082         struct dentry *t_options;
4083
4084         t_options = trace_options_init_dentry();
4085         if (!t_options)
4086                 return;
4087
4088         topt->flags = flags;
4089         topt->opt = opt;
4090
4091         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
4092                                     &trace_options_fops);
4093
4094 }
4095
4096 static struct trace_option_dentry *
4097 create_trace_option_files(struct tracer *tracer)
4098 {
4099         struct trace_option_dentry *topts;
4100         struct tracer_flags *flags;
4101         struct tracer_opt *opts;
4102         int cnt;
4103
4104         if (!tracer)
4105                 return NULL;
4106
4107         flags = tracer->flags;
4108
4109         if (!flags || !flags->opts)
4110                 return NULL;
4111
4112         opts = flags->opts;
4113
4114         for (cnt = 0; opts[cnt].name; cnt++)
4115                 ;
4116
4117         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
4118         if (!topts)
4119                 return NULL;
4120
4121         for (cnt = 0; opts[cnt].name; cnt++)
4122                 create_trace_option_file(&topts[cnt], flags,
4123                                          &opts[cnt]);
4124
4125         return topts;
4126 }
4127
4128 static void
4129 destroy_trace_option_files(struct trace_option_dentry *topts)
4130 {
4131         int cnt;
4132
4133         if (!topts)
4134                 return;
4135
4136         for (cnt = 0; topts[cnt].opt; cnt++) {
4137                 if (topts[cnt].entry)
4138                         debugfs_remove(topts[cnt].entry);
4139         }
4140
4141         kfree(topts);
4142 }
4143
4144 static struct dentry *
4145 create_trace_option_core_file(const char *option, long index)
4146 {
4147         struct dentry *t_options;
4148
4149         t_options = trace_options_init_dentry();
4150         if (!t_options)
4151                 return NULL;
4152
4153         return trace_create_file(option, 0644, t_options, (void *)index,
4154                                     &trace_options_core_fops);
4155 }
4156
4157 static __init void create_trace_options_dir(void)
4158 {
4159         struct dentry *t_options;
4160         int i;
4161
4162         t_options = trace_options_init_dentry();
4163         if (!t_options)
4164                 return;
4165
4166         for (i = 0; trace_options[i]; i++)
4167                 create_trace_option_core_file(trace_options[i], i);
4168 }
4169
4170 static __init int tracer_init_debugfs(void)
4171 {
4172         struct dentry *d_tracer;
4173         int cpu;
4174
4175         d_tracer = tracing_init_dentry();
4176
4177         trace_create_file("tracing_enabled", 0644, d_tracer,
4178                         &global_trace, &tracing_ctrl_fops);
4179
4180         trace_create_file("trace_options", 0644, d_tracer,
4181                         NULL, &tracing_iter_fops);
4182
4183         trace_create_file("tracing_cpumask", 0644, d_tracer,
4184                         NULL, &tracing_cpumask_fops);
4185
4186         trace_create_file("trace", 0644, d_tracer,
4187                         (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
4188
4189         trace_create_file("available_tracers", 0444, d_tracer,
4190                         &global_trace, &show_traces_fops);
4191
4192         trace_create_file("current_tracer", 0644, d_tracer,
4193                         &global_trace, &set_tracer_fops);
4194
4195 #ifdef CONFIG_TRACER_MAX_TRACE
4196         trace_create_file("tracing_max_latency", 0644, d_tracer,
4197                         &tracing_max_latency, &tracing_max_lat_fops);
4198
4199         trace_create_file("tracing_thresh", 0644, d_tracer,
4200                         &tracing_thresh, &tracing_max_lat_fops);
4201 #endif
4202
4203         trace_create_file("README", 0444, d_tracer,
4204                         NULL, &tracing_readme_fops);
4205
4206         trace_create_file("trace_pipe", 0444, d_tracer,
4207                         (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4208
4209         trace_create_file("buffer_size_kb", 0644, d_tracer,
4210                         &global_trace, &tracing_entries_fops);
4211
4212         trace_create_file("trace_marker", 0220, d_tracer,
4213                         NULL, &tracing_mark_fops);
4214
4215         trace_create_file("saved_cmdlines", 0444, d_tracer,
4216                         NULL, &tracing_saved_cmdlines_fops);
4217
4218         trace_create_file("trace_clock", 0644, d_tracer, NULL,
4219                           &trace_clock_fops);
4220
4221 #ifdef CONFIG_DYNAMIC_FTRACE
4222         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4223                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4224 #endif
4225 #ifdef CONFIG_SYSPROF_TRACER
4226         init_tracer_sysprof_debugfs(d_tracer);
4227 #endif
4228
4229         create_trace_options_dir();
4230
4231         for_each_tracing_cpu(cpu)
4232                 tracing_init_debugfs_percpu(cpu);
4233
4234         return 0;
4235 }
4236
4237 static int trace_panic_handler(struct notifier_block *this,
4238                                unsigned long event, void *unused)
4239 {
4240         if (ftrace_dump_on_oops)
4241                 ftrace_dump();
4242         return NOTIFY_OK;
4243 }
4244
4245 static struct notifier_block trace_panic_notifier = {
4246         .notifier_call  = trace_panic_handler,
4247         .next           = NULL,
4248         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
4249 };
4250
4251 static int trace_die_handler(struct notifier_block *self,
4252                              unsigned long val,
4253                              void *data)
4254 {
4255         switch (val) {
4256         case DIE_OOPS:
4257                 if (ftrace_dump_on_oops)
4258                         ftrace_dump();
4259                 break;
4260         default:
4261                 break;
4262         }
4263         return NOTIFY_OK;
4264 }
4265
4266 static struct notifier_block trace_die_notifier = {
4267         .notifier_call = trace_die_handler,
4268         .priority = 200
4269 };
4270
4271 /*
4272  * printk is set to max of 1024, we really don't need it that big.
4273  * Nothing should be printing 1000 characters anyway.
4274  */
4275 #define TRACE_MAX_PRINT         1000
4276
4277 /*
4278  * Define here KERN_TRACE so that we have one place to modify
4279  * it if we decide to change what log level the ftrace dump
4280  * should be at.
4281  */
4282 #define KERN_TRACE              KERN_EMERG
4283
4284 static void
4285 trace_printk_seq(struct trace_seq *s)
4286 {
4287         /* Probably should print a warning here. */
4288         if (s->len >= 1000)
4289                 s->len = 1000;
4290
4291         /* should be zero ended, but we are paranoid. */
4292         s->buffer[s->len] = 0;
4293
4294         printk(KERN_TRACE "%s", s->buffer);
4295
4296         trace_seq_init(s);
4297 }
4298
4299 static void __ftrace_dump(bool disable_tracing)
4300 {
4301         static raw_spinlock_t ftrace_dump_lock =
4302                 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
4303         /* use static because iter can be a bit big for the stack */
4304         static struct trace_iterator iter;
4305         unsigned int old_userobj;
4306         static int dump_ran;
4307         unsigned long flags;
4308         int cnt = 0, cpu;
4309
4310         /* only one dump */
4311         local_irq_save(flags);
4312         __raw_spin_lock(&ftrace_dump_lock);
4313         if (dump_ran)
4314                 goto out;
4315
4316         dump_ran = 1;
4317
4318         tracing_off();
4319
4320         if (disable_tracing)
4321                 ftrace_kill();
4322
4323         for_each_tracing_cpu(cpu) {
4324                 atomic_inc(&global_trace.data[cpu]->disabled);
4325         }
4326
4327         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
4328
4329         /* don't look at user memory in panic mode */
4330         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4331
4332         printk(KERN_TRACE "Dumping ftrace buffer:\n");
4333
4334         /* Simulate the iterator */
4335         iter.tr = &global_trace;
4336         iter.trace = current_trace;
4337         iter.cpu_file = TRACE_PIPE_ALL_CPU;
4338
4339         /*
4340          * We need to stop all tracing on all CPUS to read the
4341          * the next buffer. This is a bit expensive, but is
4342          * not done often. We fill all what we can read,
4343          * and then release the locks again.
4344          */
4345
4346         while (!trace_empty(&iter)) {
4347
4348                 if (!cnt)
4349                         printk(KERN_TRACE "---------------------------------\n");
4350
4351                 cnt++;
4352
4353                 /* reset all but tr, trace, and overruns */
4354                 memset(&iter.seq, 0,
4355                        sizeof(struct trace_iterator) -
4356                        offsetof(struct trace_iterator, seq));
4357                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
4358                 iter.pos = -1;
4359
4360                 if (find_next_entry_inc(&iter) != NULL) {
4361                         int ret;
4362
4363                         ret = print_trace_line(&iter);
4364                         if (ret != TRACE_TYPE_NO_CONSUME)
4365                                 trace_consume(&iter);
4366                 }
4367
4368                 trace_printk_seq(&iter.seq);
4369         }
4370
4371         if (!cnt)
4372                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
4373         else
4374                 printk(KERN_TRACE "---------------------------------\n");
4375
4376         /* Re-enable tracing if requested */
4377         if (!disable_tracing) {
4378                 trace_flags |= old_userobj;
4379
4380                 for_each_tracing_cpu(cpu) {
4381                         atomic_dec(&global_trace.data[cpu]->disabled);
4382                 }
4383                 tracing_on();
4384         }
4385
4386  out:
4387         __raw_spin_unlock(&ftrace_dump_lock);
4388         local_irq_restore(flags);
4389 }
4390
4391 /* By default: disable tracing after the dump */
4392 void ftrace_dump(void)
4393 {
4394         __ftrace_dump(true);
4395 }
4396
4397 __init static int tracer_alloc_buffers(void)
4398 {
4399         int ring_buf_size;
4400         int i;
4401         int ret = -ENOMEM;
4402
4403         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4404                 goto out;
4405
4406         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4407                 goto out_free_buffer_mask;
4408
4409         if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4410                 goto out_free_tracing_cpumask;
4411
4412         /* To save memory, keep the ring buffer size to its minimum */
4413         if (ring_buffer_expanded)
4414                 ring_buf_size = trace_buf_size;
4415         else
4416                 ring_buf_size = 1;
4417
4418         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4419         cpumask_copy(tracing_cpumask, cpu_all_mask);
4420         cpumask_clear(tracing_reader_cpumask);
4421
4422         /* TODO: make the number of buffers hot pluggable with CPUS */
4423         global_trace.buffer = ring_buffer_alloc(ring_buf_size,
4424                                                    TRACE_BUFFER_FLAGS);
4425         if (!global_trace.buffer) {
4426                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
4427                 WARN_ON(1);
4428                 goto out_free_cpumask;
4429         }
4430         global_trace.entries = ring_buffer_size(global_trace.buffer);
4431
4432
4433 #ifdef CONFIG_TRACER_MAX_TRACE
4434         max_tr.buffer = ring_buffer_alloc(ring_buf_size,
4435                                              TRACE_BUFFER_FLAGS);
4436         if (!max_tr.buffer) {
4437                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4438                 WARN_ON(1);
4439                 ring_buffer_free(global_trace.buffer);
4440                 goto out_free_cpumask;
4441         }
4442         max_tr.entries = ring_buffer_size(max_tr.buffer);
4443         WARN_ON(max_tr.entries != global_trace.entries);
4444 #endif
4445
4446         /* Allocate the first page for all buffers */
4447         for_each_tracing_cpu(i) {
4448                 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4449                 max_tr.data[i] = &per_cpu(max_data, i);
4450         }
4451
4452         trace_init_cmdlines();
4453
4454         register_tracer(&nop_trace);
4455         current_trace = &nop_trace;
4456 #ifdef CONFIG_BOOT_TRACER
4457         register_tracer(&boot_tracer);
4458 #endif
4459         /* All seems OK, enable tracing */
4460         tracing_disabled = 0;
4461
4462         atomic_notifier_chain_register(&panic_notifier_list,
4463                                        &trace_panic_notifier);
4464
4465         register_die_notifier(&trace_die_notifier);
4466
4467         return 0;
4468
4469 out_free_cpumask:
4470         free_cpumask_var(tracing_reader_cpumask);
4471 out_free_tracing_cpumask:
4472         free_cpumask_var(tracing_cpumask);
4473 out_free_buffer_mask:
4474         free_cpumask_var(tracing_buffer_mask);
4475 out:
4476         return ret;
4477 }
4478
4479 __init static int clear_boot_tracer(void)
4480 {
4481         /*
4482          * The default tracer at boot buffer is an init section.
4483          * This function is called in lateinit. If we did not
4484          * find the boot tracer, then clear it out, to prevent
4485          * later registration from accessing the buffer that is
4486          * about to be freed.
4487          */
4488         if (!default_bootup_tracer)
4489                 return 0;
4490
4491         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
4492                default_bootup_tracer);
4493         default_bootup_tracer = NULL;
4494
4495         return 0;
4496 }
4497
4498 early_initcall(tracer_alloc_buffers);
4499 fs_initcall(tracer_init_debugfs);
4500 late_initcall(clear_boot_tracer);