tracing/core: Make the stack entry helpers global
[safe/jmp/linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/ring_buffer.h>
15 #include <linux/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/smp_lock.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/ctype.h>
36 #include <linux/init.h>
37 #include <linux/poll.h>
38 #include <linux/gfp.h>
39 #include <linux/fs.h>
40
41 #include "trace.h"
42 #include "trace_output.h"
43
44 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
45
46 unsigned long __read_mostly     tracing_max_latency;
47 unsigned long __read_mostly     tracing_thresh;
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 int ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* For tracers that don't implement custom flags */
70 static struct tracer_opt dummy_tracer_opt[] = {
71         { }
72 };
73
74 static struct tracer_flags dummy_tracer_flags = {
75         .val = 0,
76         .opts = dummy_tracer_opt
77 };
78
79 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
80 {
81         return 0;
82 }
83
84 /*
85  * Kill all tracing for good (never come back).
86  * It is initialized to 1 but will turn to zero if the initialization
87  * of the tracer is successful. But that is the only place that sets
88  * this back to zero.
89  */
90 static int tracing_disabled = 1;
91
92 DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
93
94 static inline void ftrace_disable_cpu(void)
95 {
96         preempt_disable();
97         local_inc(&__get_cpu_var(ftrace_cpu_disabled));
98 }
99
100 static inline void ftrace_enable_cpu(void)
101 {
102         local_dec(&__get_cpu_var(ftrace_cpu_disabled));
103         preempt_enable();
104 }
105
106 static cpumask_var_t __read_mostly      tracing_buffer_mask;
107
108 /* Define which cpu buffers are currently read in trace_pipe */
109 static cpumask_var_t                    tracing_reader_cpumask;
110
111 #define for_each_tracing_cpu(cpu)       \
112         for_each_cpu(cpu, tracing_buffer_mask)
113
114 /*
115  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
116  *
117  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
118  * is set, then ftrace_dump is called. This will output the contents
119  * of the ftrace buffers to the console.  This is very useful for
120  * capturing traces that lead to crashes and outputing it to a
121  * serial console.
122  *
123  * It is default off, but you can enable it with either specifying
124  * "ftrace_dump_on_oops" in the kernel command line, or setting
125  * /proc/sys/kernel/ftrace_dump_on_oops to true.
126  */
127 int ftrace_dump_on_oops;
128
129 static int tracing_set_tracer(const char *buf);
130
131 #define BOOTUP_TRACER_SIZE              100
132 static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
133 static char *default_bootup_tracer;
134
135 static int __init set_ftrace(char *str)
136 {
137         strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
138         default_bootup_tracer = bootup_tracer_buf;
139         /* We are using ftrace early, expand it */
140         ring_buffer_expanded = 1;
141         return 1;
142 }
143 __setup("ftrace=", set_ftrace);
144
145 static int __init set_ftrace_dump_on_oops(char *str)
146 {
147         ftrace_dump_on_oops = 1;
148         return 1;
149 }
150 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
151
152 unsigned long long ns2usecs(cycle_t nsec)
153 {
154         nsec += 500;
155         do_div(nsec, 1000);
156         return nsec;
157 }
158
159 /*
160  * The global_trace is the descriptor that holds the tracing
161  * buffers for the live tracing. For each CPU, it contains
162  * a link list of pages that will store trace entries. The
163  * page descriptor of the pages in the memory is used to hold
164  * the link list by linking the lru item in the page descriptor
165  * to each of the pages in the buffer per CPU.
166  *
167  * For each active CPU there is a data field that holds the
168  * pages for the buffer for that CPU. Each CPU has the same number
169  * of pages allocated for its buffer.
170  */
171 static struct trace_array       global_trace;
172
173 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
174
175 int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
176                                  struct ring_buffer_event *event)
177 {
178         return filter_check_discard(call, rec, global_trace.buffer, event);
179 }
180 EXPORT_SYMBOL_GPL(filter_current_check_discard);
181
182 cycle_t ftrace_now(int cpu)
183 {
184         u64 ts;
185
186         /* Early boot up does not have a buffer yet */
187         if (!global_trace.buffer)
188                 return trace_clock_local();
189
190         ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
191         ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
192
193         return ts;
194 }
195
196 /*
197  * The max_tr is used to snapshot the global_trace when a maximum
198  * latency is reached. Some tracers will use this to store a maximum
199  * trace while it continues examining live traces.
200  *
201  * The buffers for the max_tr are set up the same as the global_trace.
202  * When a snapshot is taken, the link list of the max_tr is swapped
203  * with the link list of the global_trace and the buffers are reset for
204  * the global_trace so the tracing can continue.
205  */
206 static struct trace_array       max_tr;
207
208 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
209
210 /* tracer_enabled is used to toggle activation of a tracer */
211 static int                      tracer_enabled = 1;
212
213 /**
214  * tracing_is_enabled - return tracer_enabled status
215  *
216  * This function is used by other tracers to know the status
217  * of the tracer_enabled flag.  Tracers may use this function
218  * to know if it should enable their features when starting
219  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
220  */
221 int tracing_is_enabled(void)
222 {
223         return tracer_enabled;
224 }
225
226 /*
227  * trace_buf_size is the size in bytes that is allocated
228  * for a buffer. Note, the number of bytes is always rounded
229  * to page size.
230  *
231  * This number is purposely set to a low number of 16384.
232  * If the dump on oops happens, it will be much appreciated
233  * to not have to wait for all that output. Anyway this can be
234  * boot time and run time configurable.
235  */
236 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
237
238 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
239
240 /* trace_types holds a link list of available tracers. */
241 static struct tracer            *trace_types __read_mostly;
242
243 /* current_trace points to the tracer that is currently active */
244 static struct tracer            *current_trace __read_mostly;
245
246 /*
247  * max_tracer_type_len is used to simplify the allocating of
248  * buffers to read userspace tracer names. We keep track of
249  * the longest tracer name registered.
250  */
251 static int                      max_tracer_type_len;
252
253 /*
254  * trace_types_lock is used to protect the trace_types list.
255  * This lock is also used to keep user access serialized.
256  * Accesses from userspace will grab this lock while userspace
257  * activities happen inside the kernel.
258  */
259 static DEFINE_MUTEX(trace_types_lock);
260
261 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
262 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
263
264 /* trace_flags holds trace_options default values */
265 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
266         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
267         TRACE_ITER_GRAPH_TIME;
268
269 /**
270  * trace_wake_up - wake up tasks waiting for trace input
271  *
272  * Simply wakes up any task that is blocked on the trace_wait
273  * queue. These is used with trace_poll for tasks polling the trace.
274  */
275 void trace_wake_up(void)
276 {
277         /*
278          * The runqueue_is_locked() can fail, but this is the best we
279          * have for now:
280          */
281         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
282                 wake_up(&trace_wait);
283 }
284
285 static int __init set_buf_size(char *str)
286 {
287         unsigned long buf_size;
288
289         if (!str)
290                 return 0;
291         buf_size = memparse(str, &str);
292         /* nr_entries can not be zero */
293         if (buf_size == 0)
294                 return 0;
295         trace_buf_size = buf_size;
296         return 1;
297 }
298 __setup("trace_buf_size=", set_buf_size);
299
300 unsigned long nsecs_to_usecs(unsigned long nsecs)
301 {
302         return nsecs / 1000;
303 }
304
305 /* These must match the bit postions in trace_iterator_flags */
306 static const char *trace_options[] = {
307         "print-parent",
308         "sym-offset",
309         "sym-addr",
310         "verbose",
311         "raw",
312         "hex",
313         "bin",
314         "block",
315         "stacktrace",
316         "sched-tree",
317         "trace_printk",
318         "ftrace_preempt",
319         "branch",
320         "annotate",
321         "userstacktrace",
322         "sym-userobj",
323         "printk-msg-only",
324         "context-info",
325         "latency-format",
326         "global-clock",
327         "sleep-time",
328         "graph-time",
329         NULL
330 };
331
332 /*
333  * ftrace_max_lock is used to protect the swapping of buffers
334  * when taking a max snapshot. The buffers themselves are
335  * protected by per_cpu spinlocks. But the action of the swap
336  * needs its own lock.
337  *
338  * This is defined as a raw_spinlock_t in order to help
339  * with performance when lockdep debugging is enabled.
340  */
341 static raw_spinlock_t ftrace_max_lock =
342         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
343
344 /*
345  * Copy the new maximum trace into the separate maximum-trace
346  * structure. (this way the maximum trace is permanently saved,
347  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348  */
349 static void
350 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
351 {
352         struct trace_array_cpu *data = tr->data[cpu];
353
354         max_tr.cpu = cpu;
355         max_tr.time_start = data->preempt_timestamp;
356
357         data = max_tr.data[cpu];
358         data->saved_latency = tracing_max_latency;
359
360         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
361         data->pid = tsk->pid;
362         data->uid = task_uid(tsk);
363         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
364         data->policy = tsk->policy;
365         data->rt_priority = tsk->rt_priority;
366
367         /* record this tasks comm */
368         tracing_record_cmdline(tsk);
369 }
370
371 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
372 {
373         int len;
374         int ret;
375
376         if (!cnt)
377                 return 0;
378
379         if (s->len <= s->readpos)
380                 return -EBUSY;
381
382         len = s->len - s->readpos;
383         if (cnt > len)
384                 cnt = len;
385         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
386         if (ret == cnt)
387                 return -EFAULT;
388
389         cnt -= ret;
390
391         s->readpos += cnt;
392         return cnt;
393 }
394
395 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
396 {
397         int len;
398         void *ret;
399
400         if (s->len <= s->readpos)
401                 return -EBUSY;
402
403         len = s->len - s->readpos;
404         if (cnt > len)
405                 cnt = len;
406         ret = memcpy(buf, s->buffer + s->readpos, cnt);
407         if (!ret)
408                 return -EFAULT;
409
410         s->readpos += cnt;
411         return cnt;
412 }
413
414 /**
415  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
416  * @tr: tracer
417  * @tsk: the task with the latency
418  * @cpu: The cpu that initiated the trace.
419  *
420  * Flip the buffers between the @tr and the max_tr and record information
421  * about which task was the cause of this latency.
422  */
423 void
424 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
425 {
426         struct ring_buffer *buf = tr->buffer;
427
428         WARN_ON_ONCE(!irqs_disabled());
429         __raw_spin_lock(&ftrace_max_lock);
430
431         tr->buffer = max_tr.buffer;
432         max_tr.buffer = buf;
433
434         ftrace_disable_cpu();
435         ring_buffer_reset(tr->buffer);
436         ftrace_enable_cpu();
437
438         __update_max_tr(tr, tsk, cpu);
439         __raw_spin_unlock(&ftrace_max_lock);
440 }
441
442 /**
443  * update_max_tr_single - only copy one trace over, and reset the rest
444  * @tr - tracer
445  * @tsk - task with the latency
446  * @cpu - the cpu of the buffer to copy.
447  *
448  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
449  */
450 void
451 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
452 {
453         int ret;
454
455         WARN_ON_ONCE(!irqs_disabled());
456         __raw_spin_lock(&ftrace_max_lock);
457
458         ftrace_disable_cpu();
459
460         ring_buffer_reset(max_tr.buffer);
461         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
462
463         ftrace_enable_cpu();
464
465         WARN_ON_ONCE(ret && ret != -EAGAIN);
466
467         __update_max_tr(tr, tsk, cpu);
468         __raw_spin_unlock(&ftrace_max_lock);
469 }
470
471 /**
472  * register_tracer - register a tracer with the ftrace system.
473  * @type - the plugin for the tracer
474  *
475  * Register a new plugin tracer.
476  */
477 int register_tracer(struct tracer *type)
478 __releases(kernel_lock)
479 __acquires(kernel_lock)
480 {
481         struct tracer *t;
482         int len;
483         int ret = 0;
484
485         if (!type->name) {
486                 pr_info("Tracer must have a name\n");
487                 return -1;
488         }
489
490         /*
491          * When this gets called we hold the BKL which means that
492          * preemption is disabled. Various trace selftests however
493          * need to disable and enable preemption for successful tests.
494          * So we drop the BKL here and grab it after the tests again.
495          */
496         unlock_kernel();
497         mutex_lock(&trace_types_lock);
498
499         tracing_selftest_running = true;
500
501         for (t = trace_types; t; t = t->next) {
502                 if (strcmp(type->name, t->name) == 0) {
503                         /* already found */
504                         pr_info("Trace %s already registered\n",
505                                 type->name);
506                         ret = -1;
507                         goto out;
508                 }
509         }
510
511         if (!type->set_flag)
512                 type->set_flag = &dummy_set_flag;
513         if (!type->flags)
514                 type->flags = &dummy_tracer_flags;
515         else
516                 if (!type->flags->opts)
517                         type->flags->opts = dummy_tracer_opt;
518         if (!type->wait_pipe)
519                 type->wait_pipe = default_wait_pipe;
520
521
522 #ifdef CONFIG_FTRACE_STARTUP_TEST
523         if (type->selftest && !tracing_selftest_disabled) {
524                 struct tracer *saved_tracer = current_trace;
525                 struct trace_array *tr = &global_trace;
526                 int i;
527
528                 /*
529                  * Run a selftest on this tracer.
530                  * Here we reset the trace buffer, and set the current
531                  * tracer to be this tracer. The tracer can then run some
532                  * internal tracing to verify that everything is in order.
533                  * If we fail, we do not register this tracer.
534                  */
535                 for_each_tracing_cpu(i)
536                         tracing_reset(tr, i);
537
538                 current_trace = type;
539                 /* the test is responsible for initializing and enabling */
540                 pr_info("Testing tracer %s: ", type->name);
541                 ret = type->selftest(type, tr);
542                 /* the test is responsible for resetting too */
543                 current_trace = saved_tracer;
544                 if (ret) {
545                         printk(KERN_CONT "FAILED!\n");
546                         goto out;
547                 }
548                 /* Only reset on passing, to avoid touching corrupted buffers */
549                 for_each_tracing_cpu(i)
550                         tracing_reset(tr, i);
551
552                 printk(KERN_CONT "PASSED\n");
553         }
554 #endif
555
556         type->next = trace_types;
557         trace_types = type;
558         len = strlen(type->name);
559         if (len > max_tracer_type_len)
560                 max_tracer_type_len = len;
561
562  out:
563         tracing_selftest_running = false;
564         mutex_unlock(&trace_types_lock);
565
566         if (ret || !default_bootup_tracer)
567                 goto out_unlock;
568
569         if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
570                 goto out_unlock;
571
572         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
573         /* Do we want this tracer to start on bootup? */
574         tracing_set_tracer(type->name);
575         default_bootup_tracer = NULL;
576         /* disable other selftests, since this will break it. */
577         tracing_selftest_disabled = 1;
578 #ifdef CONFIG_FTRACE_STARTUP_TEST
579         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
580                type->name);
581 #endif
582
583  out_unlock:
584         lock_kernel();
585         return ret;
586 }
587
588 void unregister_tracer(struct tracer *type)
589 {
590         struct tracer **t;
591         int len;
592
593         mutex_lock(&trace_types_lock);
594         for (t = &trace_types; *t; t = &(*t)->next) {
595                 if (*t == type)
596                         goto found;
597         }
598         pr_info("Trace %s not registered\n", type->name);
599         goto out;
600
601  found:
602         *t = (*t)->next;
603
604         if (type == current_trace && tracer_enabled) {
605                 tracer_enabled = 0;
606                 tracing_stop();
607                 if (current_trace->stop)
608                         current_trace->stop(&global_trace);
609                 current_trace = &nop_trace;
610         }
611
612         if (strlen(type->name) != max_tracer_type_len)
613                 goto out;
614
615         max_tracer_type_len = 0;
616         for (t = &trace_types; *t; t = &(*t)->next) {
617                 len = strlen((*t)->name);
618                 if (len > max_tracer_type_len)
619                         max_tracer_type_len = len;
620         }
621  out:
622         mutex_unlock(&trace_types_lock);
623 }
624
625 void tracing_reset(struct trace_array *tr, int cpu)
626 {
627         ftrace_disable_cpu();
628         ring_buffer_reset_cpu(tr->buffer, cpu);
629         ftrace_enable_cpu();
630 }
631
632 void tracing_reset_online_cpus(struct trace_array *tr)
633 {
634         int cpu;
635
636         tr->time_start = ftrace_now(tr->cpu);
637
638         for_each_online_cpu(cpu)
639                 tracing_reset(tr, cpu);
640 }
641
642 void tracing_reset_current(int cpu)
643 {
644         tracing_reset(&global_trace, cpu);
645 }
646
647 void tracing_reset_current_online_cpus(void)
648 {
649         tracing_reset_online_cpus(&global_trace);
650 }
651
652 #define SAVED_CMDLINES 128
653 #define NO_CMDLINE_MAP UINT_MAX
654 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
655 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
656 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
657 static int cmdline_idx;
658 static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
659
660 /* temporary disable recording */
661 static atomic_t trace_record_cmdline_disabled __read_mostly;
662
663 static void trace_init_cmdlines(void)
664 {
665         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
666         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
667         cmdline_idx = 0;
668 }
669
670 static int trace_stop_count;
671 static DEFINE_SPINLOCK(tracing_start_lock);
672
673 /**
674  * ftrace_off_permanent - disable all ftrace code permanently
675  *
676  * This should only be called when a serious anomally has
677  * been detected.  This will turn off the function tracing,
678  * ring buffers, and other tracing utilites. It takes no
679  * locks and can be called from any context.
680  */
681 void ftrace_off_permanent(void)
682 {
683         tracing_disabled = 1;
684         ftrace_stop();
685         tracing_off_permanent();
686 }
687
688 /**
689  * tracing_start - quick start of the tracer
690  *
691  * If tracing is enabled but was stopped by tracing_stop,
692  * this will start the tracer back up.
693  */
694 void tracing_start(void)
695 {
696         struct ring_buffer *buffer;
697         unsigned long flags;
698
699         if (tracing_disabled)
700                 return;
701
702         spin_lock_irqsave(&tracing_start_lock, flags);
703         if (--trace_stop_count) {
704                 if (trace_stop_count < 0) {
705                         /* Someone screwed up their debugging */
706                         WARN_ON_ONCE(1);
707                         trace_stop_count = 0;
708                 }
709                 goto out;
710         }
711
712
713         buffer = global_trace.buffer;
714         if (buffer)
715                 ring_buffer_record_enable(buffer);
716
717         buffer = max_tr.buffer;
718         if (buffer)
719                 ring_buffer_record_enable(buffer);
720
721         ftrace_start();
722  out:
723         spin_unlock_irqrestore(&tracing_start_lock, flags);
724 }
725
726 /**
727  * tracing_stop - quick stop of the tracer
728  *
729  * Light weight way to stop tracing. Use in conjunction with
730  * tracing_start.
731  */
732 void tracing_stop(void)
733 {
734         struct ring_buffer *buffer;
735         unsigned long flags;
736
737         ftrace_stop();
738         spin_lock_irqsave(&tracing_start_lock, flags);
739         if (trace_stop_count++)
740                 goto out;
741
742         buffer = global_trace.buffer;
743         if (buffer)
744                 ring_buffer_record_disable(buffer);
745
746         buffer = max_tr.buffer;
747         if (buffer)
748                 ring_buffer_record_disable(buffer);
749
750  out:
751         spin_unlock_irqrestore(&tracing_start_lock, flags);
752 }
753
754 void trace_stop_cmdline_recording(void);
755
756 static void trace_save_cmdline(struct task_struct *tsk)
757 {
758         unsigned pid, idx;
759
760         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
761                 return;
762
763         /*
764          * It's not the end of the world if we don't get
765          * the lock, but we also don't want to spin
766          * nor do we want to disable interrupts,
767          * so if we miss here, then better luck next time.
768          */
769         if (!__raw_spin_trylock(&trace_cmdline_lock))
770                 return;
771
772         idx = map_pid_to_cmdline[tsk->pid];
773         if (idx == NO_CMDLINE_MAP) {
774                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
775
776                 /*
777                  * Check whether the cmdline buffer at idx has a pid
778                  * mapped. We are going to overwrite that entry so we
779                  * need to clear the map_pid_to_cmdline. Otherwise we
780                  * would read the new comm for the old pid.
781                  */
782                 pid = map_cmdline_to_pid[idx];
783                 if (pid != NO_CMDLINE_MAP)
784                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
785
786                 map_cmdline_to_pid[idx] = tsk->pid;
787                 map_pid_to_cmdline[tsk->pid] = idx;
788
789                 cmdline_idx = idx;
790         }
791
792         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
793
794         __raw_spin_unlock(&trace_cmdline_lock);
795 }
796
797 void trace_find_cmdline(int pid, char comm[])
798 {
799         unsigned map;
800
801         if (!pid) {
802                 strcpy(comm, "<idle>");
803                 return;
804         }
805
806         if (pid > PID_MAX_DEFAULT) {
807                 strcpy(comm, "<...>");
808                 return;
809         }
810
811         preempt_disable();
812         __raw_spin_lock(&trace_cmdline_lock);
813         map = map_pid_to_cmdline[pid];
814         if (map != NO_CMDLINE_MAP)
815                 strcpy(comm, saved_cmdlines[map]);
816         else
817                 strcpy(comm, "<...>");
818
819         __raw_spin_unlock(&trace_cmdline_lock);
820         preempt_enable();
821 }
822
823 void tracing_record_cmdline(struct task_struct *tsk)
824 {
825         if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
826             !tracing_is_on())
827                 return;
828
829         trace_save_cmdline(tsk);
830 }
831
832 void
833 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
834                              int pc)
835 {
836         struct task_struct *tsk = current;
837
838         entry->preempt_count            = pc & 0xff;
839         entry->pid                      = (tsk) ? tsk->pid : 0;
840         entry->tgid                     = (tsk) ? tsk->tgid : 0;
841         entry->flags =
842 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
843                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
844 #else
845                 TRACE_FLAG_IRQS_NOSUPPORT |
846 #endif
847                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
848                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
849                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
850 }
851
852 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
853                                                     int type,
854                                                     unsigned long len,
855                                                     unsigned long flags, int pc)
856 {
857         struct ring_buffer_event *event;
858
859         event = ring_buffer_lock_reserve(tr->buffer, len);
860         if (event != NULL) {
861                 struct trace_entry *ent = ring_buffer_event_data(event);
862
863                 tracing_generic_entry_update(ent, flags, pc);
864                 ent->type = type;
865         }
866
867         return event;
868 }
869
870 static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
871                                         struct ring_buffer_event *event,
872                                         unsigned long flags, int pc,
873                                         int wake)
874 {
875         ring_buffer_unlock_commit(tr->buffer, event);
876
877         ftrace_trace_stack(tr, flags, 6, pc);
878         ftrace_trace_userstack(tr, flags, pc);
879
880         if (wake)
881                 trace_wake_up();
882 }
883
884 void trace_buffer_unlock_commit(struct trace_array *tr,
885                                         struct ring_buffer_event *event,
886                                         unsigned long flags, int pc)
887 {
888         __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
889 }
890
891 struct ring_buffer_event *
892 trace_current_buffer_lock_reserve(int type, unsigned long len,
893                                   unsigned long flags, int pc)
894 {
895         return trace_buffer_lock_reserve(&global_trace,
896                                          type, len, flags, pc);
897 }
898 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
899
900 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
901                                         unsigned long flags, int pc)
902 {
903         __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
904 }
905 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
906
907 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
908                                         unsigned long flags, int pc)
909 {
910         __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
911 }
912 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
913
914 void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
915 {
916         ring_buffer_discard_commit(global_trace.buffer, event);
917 }
918 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
919
920 void
921 trace_function(struct trace_array *tr,
922                unsigned long ip, unsigned long parent_ip, unsigned long flags,
923                int pc)
924 {
925         struct ftrace_event_call *call = &event_function;
926         struct ring_buffer_event *event;
927         struct ftrace_entry *entry;
928
929         /* If we are reading the ring buffer, don't trace */
930         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
931                 return;
932
933         event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
934                                           flags, pc);
935         if (!event)
936                 return;
937         entry   = ring_buffer_event_data(event);
938         entry->ip                       = ip;
939         entry->parent_ip                = parent_ip;
940
941         if (!filter_check_discard(call, entry, tr->buffer, event))
942                 ring_buffer_unlock_commit(tr->buffer, event);
943 }
944
945 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
946 static int __trace_graph_entry(struct trace_array *tr,
947                                 struct ftrace_graph_ent *trace,
948                                 unsigned long flags,
949                                 int pc)
950 {
951         struct ftrace_event_call *call = &event_funcgraph_entry;
952         struct ring_buffer_event *event;
953         struct ftrace_graph_ent_entry *entry;
954
955         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
956                 return 0;
957
958         event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
959                                           sizeof(*entry), flags, pc);
960         if (!event)
961                 return 0;
962         entry   = ring_buffer_event_data(event);
963         entry->graph_ent                        = *trace;
964         if (!filter_current_check_discard(call, entry, event))
965                 ring_buffer_unlock_commit(global_trace.buffer, event);
966
967         return 1;
968 }
969
970 static void __trace_graph_return(struct trace_array *tr,
971                                 struct ftrace_graph_ret *trace,
972                                 unsigned long flags,
973                                 int pc)
974 {
975         struct ftrace_event_call *call = &event_funcgraph_exit;
976         struct ring_buffer_event *event;
977         struct ftrace_graph_ret_entry *entry;
978
979         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
980                 return;
981
982         event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
983                                           sizeof(*entry), flags, pc);
984         if (!event)
985                 return;
986         entry   = ring_buffer_event_data(event);
987         entry->ret                              = *trace;
988         if (!filter_current_check_discard(call, entry, event))
989                 ring_buffer_unlock_commit(global_trace.buffer, event);
990 }
991 #endif
992
993 void
994 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
995        unsigned long ip, unsigned long parent_ip, unsigned long flags,
996        int pc)
997 {
998         if (likely(!atomic_read(&data->disabled)))
999                 trace_function(tr, ip, parent_ip, flags, pc);
1000 }
1001
1002 #ifdef CONFIG_STACKTRACE
1003 static void __ftrace_trace_stack(struct trace_array *tr,
1004                                  unsigned long flags,
1005                                  int skip, int pc)
1006 {
1007         struct ftrace_event_call *call = &event_kernel_stack;
1008         struct ring_buffer_event *event;
1009         struct stack_entry *entry;
1010         struct stack_trace trace;
1011
1012         event = trace_buffer_lock_reserve(tr, TRACE_STACK,
1013                                           sizeof(*entry), flags, pc);
1014         if (!event)
1015                 return;
1016         entry   = ring_buffer_event_data(event);
1017         memset(&entry->caller, 0, sizeof(entry->caller));
1018
1019         trace.nr_entries        = 0;
1020         trace.max_entries       = FTRACE_STACK_ENTRIES;
1021         trace.skip              = skip;
1022         trace.entries           = entry->caller;
1023
1024         save_stack_trace(&trace);
1025         if (!filter_check_discard(call, entry, tr->buffer, event))
1026                 ring_buffer_unlock_commit(tr->buffer, event);
1027 }
1028
1029 void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1030                         int pc)
1031 {
1032         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1033                 return;
1034
1035         __ftrace_trace_stack(tr, flags, skip, pc);
1036 }
1037
1038 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1039                    int pc)
1040 {
1041         __ftrace_trace_stack(tr, flags, skip, pc);
1042 }
1043
1044 void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
1045 {
1046         struct ftrace_event_call *call = &event_user_stack;
1047         struct ring_buffer_event *event;
1048         struct userstack_entry *entry;
1049         struct stack_trace trace;
1050
1051         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1052                 return;
1053
1054         event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
1055                                           sizeof(*entry), flags, pc);
1056         if (!event)
1057                 return;
1058         entry   = ring_buffer_event_data(event);
1059
1060         memset(&entry->caller, 0, sizeof(entry->caller));
1061
1062         trace.nr_entries        = 0;
1063         trace.max_entries       = FTRACE_STACK_ENTRIES;
1064         trace.skip              = 0;
1065         trace.entries           = entry->caller;
1066
1067         save_stack_trace_user(&trace);
1068         if (!filter_check_discard(call, entry, tr->buffer, event))
1069                 ring_buffer_unlock_commit(tr->buffer, event);
1070 }
1071
1072 #ifdef UNUSED
1073 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1074 {
1075         ftrace_trace_userstack(tr, flags, preempt_count());
1076 }
1077 #endif /* UNUSED */
1078
1079 #endif /* CONFIG_STACKTRACE */
1080
1081 static void
1082 ftrace_trace_special(void *__tr,
1083                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
1084                      int pc)
1085 {
1086         struct ring_buffer_event *event;
1087         struct trace_array *tr = __tr;
1088         struct special_entry *entry;
1089
1090         event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1091                                           sizeof(*entry), 0, pc);
1092         if (!event)
1093                 return;
1094         entry   = ring_buffer_event_data(event);
1095         entry->arg1                     = arg1;
1096         entry->arg2                     = arg2;
1097         entry->arg3                     = arg3;
1098         trace_buffer_unlock_commit(tr, event, 0, pc);
1099 }
1100
1101 void
1102 __trace_special(void *__tr, void *__data,
1103                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1104 {
1105         ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1106 }
1107
1108 void
1109 tracing_sched_switch_trace(struct trace_array *tr,
1110                            struct task_struct *prev,
1111                            struct task_struct *next,
1112                            unsigned long flags, int pc)
1113 {
1114         struct ftrace_event_call *call = &event_context_switch;
1115         struct ring_buffer_event *event;
1116         struct ctx_switch_entry *entry;
1117
1118         event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1119                                           sizeof(*entry), flags, pc);
1120         if (!event)
1121                 return;
1122         entry   = ring_buffer_event_data(event);
1123         entry->prev_pid                 = prev->pid;
1124         entry->prev_prio                = prev->prio;
1125         entry->prev_state               = prev->state;
1126         entry->next_pid                 = next->pid;
1127         entry->next_prio                = next->prio;
1128         entry->next_state               = next->state;
1129         entry->next_cpu = task_cpu(next);
1130
1131         if (!filter_check_discard(call, entry, tr->buffer, event))
1132                 trace_buffer_unlock_commit(tr, event, flags, pc);
1133 }
1134
1135 void
1136 tracing_sched_wakeup_trace(struct trace_array *tr,
1137                            struct task_struct *wakee,
1138                            struct task_struct *curr,
1139                            unsigned long flags, int pc)
1140 {
1141         struct ftrace_event_call *call = &event_wakeup;
1142         struct ring_buffer_event *event;
1143         struct ctx_switch_entry *entry;
1144
1145         event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1146                                           sizeof(*entry), flags, pc);
1147         if (!event)
1148                 return;
1149         entry   = ring_buffer_event_data(event);
1150         entry->prev_pid                 = curr->pid;
1151         entry->prev_prio                = curr->prio;
1152         entry->prev_state               = curr->state;
1153         entry->next_pid                 = wakee->pid;
1154         entry->next_prio                = wakee->prio;
1155         entry->next_state               = wakee->state;
1156         entry->next_cpu                 = task_cpu(wakee);
1157
1158         if (!filter_check_discard(call, entry, tr->buffer, event))
1159                 ring_buffer_unlock_commit(tr->buffer, event);
1160         ftrace_trace_stack(tr, flags, 6, pc);
1161         ftrace_trace_userstack(tr, flags, pc);
1162 }
1163
1164 void
1165 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1166 {
1167         struct trace_array *tr = &global_trace;
1168         struct trace_array_cpu *data;
1169         unsigned long flags;
1170         int cpu;
1171         int pc;
1172
1173         if (tracing_disabled)
1174                 return;
1175
1176         pc = preempt_count();
1177         local_irq_save(flags);
1178         cpu = raw_smp_processor_id();
1179         data = tr->data[cpu];
1180
1181         if (likely(atomic_inc_return(&data->disabled) == 1))
1182                 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1183
1184         atomic_dec(&data->disabled);
1185         local_irq_restore(flags);
1186 }
1187
1188 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1189 int trace_graph_entry(struct ftrace_graph_ent *trace)
1190 {
1191         struct trace_array *tr = &global_trace;
1192         struct trace_array_cpu *data;
1193         unsigned long flags;
1194         long disabled;
1195         int ret;
1196         int cpu;
1197         int pc;
1198
1199         if (!ftrace_trace_task(current))
1200                 return 0;
1201
1202         if (!ftrace_graph_addr(trace->func))
1203                 return 0;
1204
1205         local_irq_save(flags);
1206         cpu = raw_smp_processor_id();
1207         data = tr->data[cpu];
1208         disabled = atomic_inc_return(&data->disabled);
1209         if (likely(disabled == 1)) {
1210                 pc = preempt_count();
1211                 ret = __trace_graph_entry(tr, trace, flags, pc);
1212         } else {
1213                 ret = 0;
1214         }
1215         /* Only do the atomic if it is not already set */
1216         if (!test_tsk_trace_graph(current))
1217                 set_tsk_trace_graph(current);
1218
1219         atomic_dec(&data->disabled);
1220         local_irq_restore(flags);
1221
1222         return ret;
1223 }
1224
1225 void trace_graph_return(struct ftrace_graph_ret *trace)
1226 {
1227         struct trace_array *tr = &global_trace;
1228         struct trace_array_cpu *data;
1229         unsigned long flags;
1230         long disabled;
1231         int cpu;
1232         int pc;
1233
1234         local_irq_save(flags);
1235         cpu = raw_smp_processor_id();
1236         data = tr->data[cpu];
1237         disabled = atomic_inc_return(&data->disabled);
1238         if (likely(disabled == 1)) {
1239                 pc = preempt_count();
1240                 __trace_graph_return(tr, trace, flags, pc);
1241         }
1242         if (!trace->depth)
1243                 clear_tsk_trace_graph(current);
1244         atomic_dec(&data->disabled);
1245         local_irq_restore(flags);
1246 }
1247 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1248
1249
1250 /**
1251  * trace_vbprintk - write binary msg to tracing buffer
1252  *
1253  */
1254 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1255 {
1256         static raw_spinlock_t trace_buf_lock =
1257                 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1258         static u32 trace_buf[TRACE_BUF_SIZE];
1259
1260         struct ftrace_event_call *call = &event_bprint;
1261         struct ring_buffer_event *event;
1262         struct trace_array *tr = &global_trace;
1263         struct trace_array_cpu *data;
1264         struct bprint_entry *entry;
1265         unsigned long flags;
1266         int disable;
1267         int resched;
1268         int cpu, len = 0, size, pc;
1269
1270         if (unlikely(tracing_selftest_running || tracing_disabled))
1271                 return 0;
1272
1273         /* Don't pollute graph traces with trace_vprintk internals */
1274         pause_graph_tracing();
1275
1276         pc = preempt_count();
1277         resched = ftrace_preempt_disable();
1278         cpu = raw_smp_processor_id();
1279         data = tr->data[cpu];
1280
1281         disable = atomic_inc_return(&data->disabled);
1282         if (unlikely(disable != 1))
1283                 goto out;
1284
1285         /* Lockdep uses trace_printk for lock tracing */
1286         local_irq_save(flags);
1287         __raw_spin_lock(&trace_buf_lock);
1288         len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1289
1290         if (len > TRACE_BUF_SIZE || len < 0)
1291                 goto out_unlock;
1292
1293         size = sizeof(*entry) + sizeof(u32) * len;
1294         event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
1295         if (!event)
1296                 goto out_unlock;
1297         entry = ring_buffer_event_data(event);
1298         entry->ip                       = ip;
1299         entry->fmt                      = fmt;
1300
1301         memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1302         if (!filter_check_discard(call, entry, tr->buffer, event))
1303                 ring_buffer_unlock_commit(tr->buffer, event);
1304
1305 out_unlock:
1306         __raw_spin_unlock(&trace_buf_lock);
1307         local_irq_restore(flags);
1308
1309 out:
1310         atomic_dec_return(&data->disabled);
1311         ftrace_preempt_enable(resched);
1312         unpause_graph_tracing();
1313
1314         return len;
1315 }
1316 EXPORT_SYMBOL_GPL(trace_vbprintk);
1317
1318 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1319 {
1320         static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1321         static char trace_buf[TRACE_BUF_SIZE];
1322
1323         struct ftrace_event_call *call = &event_print;
1324         struct ring_buffer_event *event;
1325         struct trace_array *tr = &global_trace;
1326         struct trace_array_cpu *data;
1327         int cpu, len = 0, size, pc;
1328         struct print_entry *entry;
1329         unsigned long irq_flags;
1330         int disable;
1331
1332         if (tracing_disabled || tracing_selftest_running)
1333                 return 0;
1334
1335         pc = preempt_count();
1336         preempt_disable_notrace();
1337         cpu = raw_smp_processor_id();
1338         data = tr->data[cpu];
1339
1340         disable = atomic_inc_return(&data->disabled);
1341         if (unlikely(disable != 1))
1342                 goto out;
1343
1344         pause_graph_tracing();
1345         raw_local_irq_save(irq_flags);
1346         __raw_spin_lock(&trace_buf_lock);
1347         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1348
1349         len = min(len, TRACE_BUF_SIZE-1);
1350         trace_buf[len] = 0;
1351
1352         size = sizeof(*entry) + len + 1;
1353         event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
1354         if (!event)
1355                 goto out_unlock;
1356         entry = ring_buffer_event_data(event);
1357         entry->ip                       = ip;
1358
1359         memcpy(&entry->buf, trace_buf, len);
1360         entry->buf[len] = 0;
1361         if (!filter_check_discard(call, entry, tr->buffer, event))
1362                 ring_buffer_unlock_commit(tr->buffer, event);
1363
1364  out_unlock:
1365         __raw_spin_unlock(&trace_buf_lock);
1366         raw_local_irq_restore(irq_flags);
1367         unpause_graph_tracing();
1368  out:
1369         atomic_dec_return(&data->disabled);
1370         preempt_enable_notrace();
1371
1372         return len;
1373 }
1374 EXPORT_SYMBOL_GPL(trace_vprintk);
1375
1376 enum trace_file_type {
1377         TRACE_FILE_LAT_FMT      = 1,
1378         TRACE_FILE_ANNOTATE     = 2,
1379 };
1380
1381 static void trace_iterator_increment(struct trace_iterator *iter)
1382 {
1383         /* Don't allow ftrace to trace into the ring buffers */
1384         ftrace_disable_cpu();
1385
1386         iter->idx++;
1387         if (iter->buffer_iter[iter->cpu])
1388                 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1389
1390         ftrace_enable_cpu();
1391 }
1392
1393 static struct trace_entry *
1394 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1395 {
1396         struct ring_buffer_event *event;
1397         struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1398
1399         /* Don't allow ftrace to trace into the ring buffers */
1400         ftrace_disable_cpu();
1401
1402         if (buf_iter)
1403                 event = ring_buffer_iter_peek(buf_iter, ts);
1404         else
1405                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1406
1407         ftrace_enable_cpu();
1408
1409         return event ? ring_buffer_event_data(event) : NULL;
1410 }
1411
1412 static struct trace_entry *
1413 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1414 {
1415         struct ring_buffer *buffer = iter->tr->buffer;
1416         struct trace_entry *ent, *next = NULL;
1417         int cpu_file = iter->cpu_file;
1418         u64 next_ts = 0, ts;
1419         int next_cpu = -1;
1420         int cpu;
1421
1422         /*
1423          * If we are in a per_cpu trace file, don't bother by iterating over
1424          * all cpu and peek directly.
1425          */
1426         if (cpu_file > TRACE_PIPE_ALL_CPU) {
1427                 if (ring_buffer_empty_cpu(buffer, cpu_file))
1428                         return NULL;
1429                 ent = peek_next_entry(iter, cpu_file, ent_ts);
1430                 if (ent_cpu)
1431                         *ent_cpu = cpu_file;
1432
1433                 return ent;
1434         }
1435
1436         for_each_tracing_cpu(cpu) {
1437
1438                 if (ring_buffer_empty_cpu(buffer, cpu))
1439                         continue;
1440
1441                 ent = peek_next_entry(iter, cpu, &ts);
1442
1443                 /*
1444                  * Pick the entry with the smallest timestamp:
1445                  */
1446                 if (ent && (!next || ts < next_ts)) {
1447                         next = ent;
1448                         next_cpu = cpu;
1449                         next_ts = ts;
1450                 }
1451         }
1452
1453         if (ent_cpu)
1454                 *ent_cpu = next_cpu;
1455
1456         if (ent_ts)
1457                 *ent_ts = next_ts;
1458
1459         return next;
1460 }
1461
1462 /* Find the next real entry, without updating the iterator itself */
1463 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1464                                           int *ent_cpu, u64 *ent_ts)
1465 {
1466         return __find_next_entry(iter, ent_cpu, ent_ts);
1467 }
1468
1469 /* Find the next real entry, and increment the iterator to the next entry */
1470 static void *find_next_entry_inc(struct trace_iterator *iter)
1471 {
1472         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1473
1474         if (iter->ent)
1475                 trace_iterator_increment(iter);
1476
1477         return iter->ent ? iter : NULL;
1478 }
1479
1480 static void trace_consume(struct trace_iterator *iter)
1481 {
1482         /* Don't allow ftrace to trace into the ring buffers */
1483         ftrace_disable_cpu();
1484         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1485         ftrace_enable_cpu();
1486 }
1487
1488 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1489 {
1490         struct trace_iterator *iter = m->private;
1491         int i = (int)*pos;
1492         void *ent;
1493
1494         (*pos)++;
1495
1496         /* can't go backwards */
1497         if (iter->idx > i)
1498                 return NULL;
1499
1500         if (iter->idx < 0)
1501                 ent = find_next_entry_inc(iter);
1502         else
1503                 ent = iter;
1504
1505         while (ent && iter->idx < i)
1506                 ent = find_next_entry_inc(iter);
1507
1508         iter->pos = *pos;
1509
1510         return ent;
1511 }
1512
1513 /*
1514  * No necessary locking here. The worst thing which can
1515  * happen is loosing events consumed at the same time
1516  * by a trace_pipe reader.
1517  * Other than that, we don't risk to crash the ring buffer
1518  * because it serializes the readers.
1519  *
1520  * The current tracer is copied to avoid a global locking
1521  * all around.
1522  */
1523 static void *s_start(struct seq_file *m, loff_t *pos)
1524 {
1525         struct trace_iterator *iter = m->private;
1526         static struct tracer *old_tracer;
1527         int cpu_file = iter->cpu_file;
1528         void *p = NULL;
1529         loff_t l = 0;
1530         int cpu;
1531
1532         /* copy the tracer to avoid using a global lock all around */
1533         mutex_lock(&trace_types_lock);
1534         if (unlikely(old_tracer != current_trace && current_trace)) {
1535                 old_tracer = current_trace;
1536                 *iter->trace = *current_trace;
1537         }
1538         mutex_unlock(&trace_types_lock);
1539
1540         atomic_inc(&trace_record_cmdline_disabled);
1541
1542         if (*pos != iter->pos) {
1543                 iter->ent = NULL;
1544                 iter->cpu = 0;
1545                 iter->idx = -1;
1546
1547                 ftrace_disable_cpu();
1548
1549                 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1550                         for_each_tracing_cpu(cpu)
1551                                 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1552                 } else
1553                         ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
1554
1555
1556                 ftrace_enable_cpu();
1557
1558                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1559                         ;
1560
1561         } else {
1562                 l = *pos - 1;
1563                 p = s_next(m, p, &l);
1564         }
1565
1566         trace_event_read_lock();
1567         return p;
1568 }
1569
1570 static void s_stop(struct seq_file *m, void *p)
1571 {
1572         atomic_dec(&trace_record_cmdline_disabled);
1573         trace_event_read_unlock();
1574 }
1575
1576 static void print_lat_help_header(struct seq_file *m)
1577 {
1578         seq_puts(m, "#                  _------=> CPU#            \n");
1579         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1580         seq_puts(m, "#                | / _----=> need-resched    \n");
1581         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1582         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1583         seq_puts(m, "#                |||| /                      \n");
1584         seq_puts(m, "#                |||||     delay             \n");
1585         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1586         seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
1587 }
1588
1589 static void print_func_help_header(struct seq_file *m)
1590 {
1591         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1592         seq_puts(m, "#              | |       |          |         |\n");
1593 }
1594
1595
1596 static void
1597 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1598 {
1599         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1600         struct trace_array *tr = iter->tr;
1601         struct trace_array_cpu *data = tr->data[tr->cpu];
1602         struct tracer *type = current_trace;
1603         unsigned long total;
1604         unsigned long entries;
1605         const char *name = "preemption";
1606
1607         if (type)
1608                 name = type->name;
1609
1610         entries = ring_buffer_entries(iter->tr->buffer);
1611         total = entries +
1612                 ring_buffer_overruns(iter->tr->buffer);
1613
1614         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1615                    name, UTS_RELEASE);
1616         seq_puts(m, "# -----------------------------------"
1617                  "---------------------------------\n");
1618         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
1619                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1620                    nsecs_to_usecs(data->saved_latency),
1621                    entries,
1622                    total,
1623                    tr->cpu,
1624 #if defined(CONFIG_PREEMPT_NONE)
1625                    "server",
1626 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1627                    "desktop",
1628 #elif defined(CONFIG_PREEMPT)
1629                    "preempt",
1630 #else
1631                    "unknown",
1632 #endif
1633                    /* These are reserved for later use */
1634                    0, 0, 0, 0);
1635 #ifdef CONFIG_SMP
1636         seq_printf(m, " #P:%d)\n", num_online_cpus());
1637 #else
1638         seq_puts(m, ")\n");
1639 #endif
1640         seq_puts(m, "#    -----------------\n");
1641         seq_printf(m, "#    | task: %.16s-%d "
1642                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1643                    data->comm, data->pid, data->uid, data->nice,
1644                    data->policy, data->rt_priority);
1645         seq_puts(m, "#    -----------------\n");
1646
1647         if (data->critical_start) {
1648                 seq_puts(m, "#  => started at: ");
1649                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1650                 trace_print_seq(m, &iter->seq);
1651                 seq_puts(m, "\n#  => ended at:   ");
1652                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1653                 trace_print_seq(m, &iter->seq);
1654                 seq_puts(m, "#\n");
1655         }
1656
1657         seq_puts(m, "#\n");
1658 }
1659
1660 static void test_cpu_buff_start(struct trace_iterator *iter)
1661 {
1662         struct trace_seq *s = &iter->seq;
1663
1664         if (!(trace_flags & TRACE_ITER_ANNOTATE))
1665                 return;
1666
1667         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1668                 return;
1669
1670         if (cpumask_test_cpu(iter->cpu, iter->started))
1671                 return;
1672
1673         cpumask_set_cpu(iter->cpu, iter->started);
1674
1675         /* Don't print started cpu buffer for the first entry of the trace */
1676         if (iter->idx > 1)
1677                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
1678                                 iter->cpu);
1679 }
1680
1681 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1682 {
1683         struct trace_seq *s = &iter->seq;
1684         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1685         struct trace_entry *entry;
1686         struct trace_event *event;
1687
1688         entry = iter->ent;
1689
1690         test_cpu_buff_start(iter);
1691
1692         event = ftrace_find_event(entry->type);
1693
1694         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1695                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1696                         if (!trace_print_lat_context(iter))
1697                                 goto partial;
1698                 } else {
1699                         if (!trace_print_context(iter))
1700                                 goto partial;
1701                 }
1702         }
1703
1704         if (event)
1705                 return event->trace(iter, sym_flags);
1706
1707         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1708                 goto partial;
1709
1710         return TRACE_TYPE_HANDLED;
1711 partial:
1712         return TRACE_TYPE_PARTIAL_LINE;
1713 }
1714
1715 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1716 {
1717         struct trace_seq *s = &iter->seq;
1718         struct trace_entry *entry;
1719         struct trace_event *event;
1720
1721         entry = iter->ent;
1722
1723         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1724                 if (!trace_seq_printf(s, "%d %d %llu ",
1725                                       entry->pid, iter->cpu, iter->ts))
1726                         goto partial;
1727         }
1728
1729         event = ftrace_find_event(entry->type);
1730         if (event)
1731                 return event->raw(iter, 0);
1732
1733         if (!trace_seq_printf(s, "%d ?\n", entry->type))
1734                 goto partial;
1735
1736         return TRACE_TYPE_HANDLED;
1737 partial:
1738         return TRACE_TYPE_PARTIAL_LINE;
1739 }
1740
1741 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1742 {
1743         struct trace_seq *s = &iter->seq;
1744         unsigned char newline = '\n';
1745         struct trace_entry *entry;
1746         struct trace_event *event;
1747
1748         entry = iter->ent;
1749
1750         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1751                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1752                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1753                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1754         }
1755
1756         event = ftrace_find_event(entry->type);
1757         if (event) {
1758                 enum print_line_t ret = event->hex(iter, 0);
1759                 if (ret != TRACE_TYPE_HANDLED)
1760                         return ret;
1761         }
1762
1763         SEQ_PUT_FIELD_RET(s, newline);
1764
1765         return TRACE_TYPE_HANDLED;
1766 }
1767
1768 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1769 {
1770         struct trace_seq *s = &iter->seq;
1771         struct trace_entry *entry;
1772         struct trace_event *event;
1773
1774         entry = iter->ent;
1775
1776         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1777                 SEQ_PUT_FIELD_RET(s, entry->pid);
1778                 SEQ_PUT_FIELD_RET(s, iter->cpu);
1779                 SEQ_PUT_FIELD_RET(s, iter->ts);
1780         }
1781
1782         event = ftrace_find_event(entry->type);
1783         return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
1784 }
1785
1786 static int trace_empty(struct trace_iterator *iter)
1787 {
1788         int cpu;
1789
1790         /* If we are looking at one CPU buffer, only check that one */
1791         if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
1792                 cpu = iter->cpu_file;
1793                 if (iter->buffer_iter[cpu]) {
1794                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1795                                 return 0;
1796                 } else {
1797                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1798                                 return 0;
1799                 }
1800                 return 1;
1801         }
1802
1803         for_each_tracing_cpu(cpu) {
1804                 if (iter->buffer_iter[cpu]) {
1805                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1806                                 return 0;
1807                 } else {
1808                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1809                                 return 0;
1810                 }
1811         }
1812
1813         return 1;
1814 }
1815
1816 /*  Called with trace_event_read_lock() held. */
1817 static enum print_line_t print_trace_line(struct trace_iterator *iter)
1818 {
1819         enum print_line_t ret;
1820
1821         if (iter->trace && iter->trace->print_line) {
1822                 ret = iter->trace->print_line(iter);
1823                 if (ret != TRACE_TYPE_UNHANDLED)
1824                         return ret;
1825         }
1826
1827         if (iter->ent->type == TRACE_BPRINT &&
1828                         trace_flags & TRACE_ITER_PRINTK &&
1829                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1830                 return trace_print_bprintk_msg_only(iter);
1831
1832         if (iter->ent->type == TRACE_PRINT &&
1833                         trace_flags & TRACE_ITER_PRINTK &&
1834                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1835                 return trace_print_printk_msg_only(iter);
1836
1837         if (trace_flags & TRACE_ITER_BIN)
1838                 return print_bin_fmt(iter);
1839
1840         if (trace_flags & TRACE_ITER_HEX)
1841                 return print_hex_fmt(iter);
1842
1843         if (trace_flags & TRACE_ITER_RAW)
1844                 return print_raw_fmt(iter);
1845
1846         return print_trace_fmt(iter);
1847 }
1848
1849 static int s_show(struct seq_file *m, void *v)
1850 {
1851         struct trace_iterator *iter = v;
1852
1853         if (iter->ent == NULL) {
1854                 if (iter->tr) {
1855                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1856                         seq_puts(m, "#\n");
1857                 }
1858                 if (iter->trace && iter->trace->print_header)
1859                         iter->trace->print_header(m);
1860                 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1861                         /* print nothing if the buffers are empty */
1862                         if (trace_empty(iter))
1863                                 return 0;
1864                         print_trace_header(m, iter);
1865                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1866                                 print_lat_help_header(m);
1867                 } else {
1868                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1869                                 print_func_help_header(m);
1870                 }
1871         } else {
1872                 print_trace_line(iter);
1873                 trace_print_seq(m, &iter->seq);
1874         }
1875
1876         return 0;
1877 }
1878
1879 static struct seq_operations tracer_seq_ops = {
1880         .start          = s_start,
1881         .next           = s_next,
1882         .stop           = s_stop,
1883         .show           = s_show,
1884 };
1885
1886 static struct trace_iterator *
1887 __tracing_open(struct inode *inode, struct file *file)
1888 {
1889         long cpu_file = (long) inode->i_private;
1890         void *fail_ret = ERR_PTR(-ENOMEM);
1891         struct trace_iterator *iter;
1892         struct seq_file *m;
1893         int cpu, ret;
1894
1895         if (tracing_disabled)
1896                 return ERR_PTR(-ENODEV);
1897
1898         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1899         if (!iter)
1900                 return ERR_PTR(-ENOMEM);
1901
1902         /*
1903          * We make a copy of the current tracer to avoid concurrent
1904          * changes on it while we are reading.
1905          */
1906         mutex_lock(&trace_types_lock);
1907         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1908         if (!iter->trace)
1909                 goto fail;
1910
1911         if (current_trace)
1912                 *iter->trace = *current_trace;
1913
1914         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
1915                 goto fail;
1916
1917         cpumask_clear(iter->started);
1918
1919         if (current_trace && current_trace->print_max)
1920                 iter->tr = &max_tr;
1921         else
1922                 iter->tr = &global_trace;
1923         iter->pos = -1;
1924         mutex_init(&iter->mutex);
1925         iter->cpu_file = cpu_file;
1926
1927         /* Notify the tracer early; before we stop tracing. */
1928         if (iter->trace && iter->trace->open)
1929                 iter->trace->open(iter);
1930
1931         /* Annotate start of buffers if we had overruns */
1932         if (ring_buffer_overruns(iter->tr->buffer))
1933                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1934
1935         if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1936                 for_each_tracing_cpu(cpu) {
1937
1938                         iter->buffer_iter[cpu] =
1939                                 ring_buffer_read_start(iter->tr->buffer, cpu);
1940                 }
1941         } else {
1942                 cpu = iter->cpu_file;
1943                 iter->buffer_iter[cpu] =
1944                                 ring_buffer_read_start(iter->tr->buffer, cpu);
1945         }
1946
1947         /* TODO stop tracer */
1948         ret = seq_open(file, &tracer_seq_ops);
1949         if (ret < 0) {
1950                 fail_ret = ERR_PTR(ret);
1951                 goto fail_buffer;
1952         }
1953
1954         m = file->private_data;
1955         m->private = iter;
1956
1957         /* stop the trace while dumping */
1958         tracing_stop();
1959
1960         mutex_unlock(&trace_types_lock);
1961
1962         return iter;
1963
1964  fail_buffer:
1965         for_each_tracing_cpu(cpu) {
1966                 if (iter->buffer_iter[cpu])
1967                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1968         }
1969         free_cpumask_var(iter->started);
1970  fail:
1971         mutex_unlock(&trace_types_lock);
1972         kfree(iter->trace);
1973         kfree(iter);
1974
1975         return fail_ret;
1976 }
1977
1978 int tracing_open_generic(struct inode *inode, struct file *filp)
1979 {
1980         if (tracing_disabled)
1981                 return -ENODEV;
1982
1983         filp->private_data = inode->i_private;
1984         return 0;
1985 }
1986
1987 static int tracing_release(struct inode *inode, struct file *file)
1988 {
1989         struct seq_file *m = (struct seq_file *)file->private_data;
1990         struct trace_iterator *iter;
1991         int cpu;
1992
1993         if (!(file->f_mode & FMODE_READ))
1994                 return 0;
1995
1996         iter = m->private;
1997
1998         mutex_lock(&trace_types_lock);
1999         for_each_tracing_cpu(cpu) {
2000                 if (iter->buffer_iter[cpu])
2001                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2002         }
2003
2004         if (iter->trace && iter->trace->close)
2005                 iter->trace->close(iter);
2006
2007         /* reenable tracing if it was previously enabled */
2008         tracing_start();
2009         mutex_unlock(&trace_types_lock);
2010
2011         seq_release(inode, file);
2012         mutex_destroy(&iter->mutex);
2013         free_cpumask_var(iter->started);
2014         kfree(iter->trace);
2015         kfree(iter);
2016         return 0;
2017 }
2018
2019 static int tracing_open(struct inode *inode, struct file *file)
2020 {
2021         struct trace_iterator *iter;
2022         int ret = 0;
2023
2024         /* If this file was open for write, then erase contents */
2025         if ((file->f_mode & FMODE_WRITE) &&
2026             !(file->f_flags & O_APPEND)) {
2027                 long cpu = (long) inode->i_private;
2028
2029                 if (cpu == TRACE_PIPE_ALL_CPU)
2030                         tracing_reset_online_cpus(&global_trace);
2031                 else
2032                         tracing_reset(&global_trace, cpu);
2033         }
2034
2035         if (file->f_mode & FMODE_READ) {
2036                 iter = __tracing_open(inode, file);
2037                 if (IS_ERR(iter))
2038                         ret = PTR_ERR(iter);
2039                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2040                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
2041         }
2042         return ret;
2043 }
2044
2045 static void *
2046 t_next(struct seq_file *m, void *v, loff_t *pos)
2047 {
2048         struct tracer *t = v;
2049
2050         (*pos)++;
2051
2052         if (t)
2053                 t = t->next;
2054
2055         return t;
2056 }
2057
2058 static void *t_start(struct seq_file *m, loff_t *pos)
2059 {
2060         struct tracer *t;
2061         loff_t l = 0;
2062
2063         mutex_lock(&trace_types_lock);
2064         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2065                 ;
2066
2067         return t;
2068 }
2069
2070 static void t_stop(struct seq_file *m, void *p)
2071 {
2072         mutex_unlock(&trace_types_lock);
2073 }
2074
2075 static int t_show(struct seq_file *m, void *v)
2076 {
2077         struct tracer *t = v;
2078
2079         if (!t)
2080                 return 0;
2081
2082         seq_printf(m, "%s", t->name);
2083         if (t->next)
2084                 seq_putc(m, ' ');
2085         else
2086                 seq_putc(m, '\n');
2087
2088         return 0;
2089 }
2090
2091 static struct seq_operations show_traces_seq_ops = {
2092         .start          = t_start,
2093         .next           = t_next,
2094         .stop           = t_stop,
2095         .show           = t_show,
2096 };
2097
2098 static int show_traces_open(struct inode *inode, struct file *file)
2099 {
2100         if (tracing_disabled)
2101                 return -ENODEV;
2102
2103         return seq_open(file, &show_traces_seq_ops);
2104 }
2105
2106 static ssize_t
2107 tracing_write_stub(struct file *filp, const char __user *ubuf,
2108                    size_t count, loff_t *ppos)
2109 {
2110         return count;
2111 }
2112
2113 static const struct file_operations tracing_fops = {
2114         .open           = tracing_open,
2115         .read           = seq_read,
2116         .write          = tracing_write_stub,
2117         .llseek         = seq_lseek,
2118         .release        = tracing_release,
2119 };
2120
2121 static const struct file_operations show_traces_fops = {
2122         .open           = show_traces_open,
2123         .read           = seq_read,
2124         .release        = seq_release,
2125 };
2126
2127 /*
2128  * Only trace on a CPU if the bitmask is set:
2129  */
2130 static cpumask_var_t tracing_cpumask;
2131
2132 /*
2133  * The tracer itself will not take this lock, but still we want
2134  * to provide a consistent cpumask to user-space:
2135  */
2136 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2137
2138 /*
2139  * Temporary storage for the character representation of the
2140  * CPU bitmask (and one more byte for the newline):
2141  */
2142 static char mask_str[NR_CPUS + 1];
2143
2144 static ssize_t
2145 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2146                      size_t count, loff_t *ppos)
2147 {
2148         int len;
2149
2150         mutex_lock(&tracing_cpumask_update_lock);
2151
2152         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2153         if (count - len < 2) {
2154                 count = -EINVAL;
2155                 goto out_err;
2156         }
2157         len += sprintf(mask_str + len, "\n");
2158         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2159
2160 out_err:
2161         mutex_unlock(&tracing_cpumask_update_lock);
2162
2163         return count;
2164 }
2165
2166 static ssize_t
2167 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2168                       size_t count, loff_t *ppos)
2169 {
2170         int err, cpu;
2171         cpumask_var_t tracing_cpumask_new;
2172
2173         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2174                 return -ENOMEM;
2175
2176         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2177         if (err)
2178                 goto err_unlock;
2179
2180         mutex_lock(&tracing_cpumask_update_lock);
2181
2182         local_irq_disable();
2183         __raw_spin_lock(&ftrace_max_lock);
2184         for_each_tracing_cpu(cpu) {
2185                 /*
2186                  * Increase/decrease the disabled counter if we are
2187                  * about to flip a bit in the cpumask:
2188                  */
2189                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2190                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2191                         atomic_inc(&global_trace.data[cpu]->disabled);
2192                 }
2193                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2194                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2195                         atomic_dec(&global_trace.data[cpu]->disabled);
2196                 }
2197         }
2198         __raw_spin_unlock(&ftrace_max_lock);
2199         local_irq_enable();
2200
2201         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2202
2203         mutex_unlock(&tracing_cpumask_update_lock);
2204         free_cpumask_var(tracing_cpumask_new);
2205
2206         return count;
2207
2208 err_unlock:
2209         free_cpumask_var(tracing_cpumask_new);
2210
2211         return err;
2212 }
2213
2214 static const struct file_operations tracing_cpumask_fops = {
2215         .open           = tracing_open_generic,
2216         .read           = tracing_cpumask_read,
2217         .write          = tracing_cpumask_write,
2218 };
2219
2220 static ssize_t
2221 tracing_trace_options_read(struct file *filp, char __user *ubuf,
2222                        size_t cnt, loff_t *ppos)
2223 {
2224         struct tracer_opt *trace_opts;
2225         u32 tracer_flags;
2226         int len = 0;
2227         char *buf;
2228         int r = 0;
2229         int i;
2230
2231
2232         /* calculate max size */
2233         for (i = 0; trace_options[i]; i++) {
2234                 len += strlen(trace_options[i]);
2235                 len += 3; /* "no" and newline */
2236         }
2237
2238         mutex_lock(&trace_types_lock);
2239         tracer_flags = current_trace->flags->val;
2240         trace_opts = current_trace->flags->opts;
2241
2242         /*
2243          * Increase the size with names of options specific
2244          * of the current tracer.
2245          */
2246         for (i = 0; trace_opts[i].name; i++) {
2247                 len += strlen(trace_opts[i].name);
2248                 len += 3; /* "no" and newline */
2249         }
2250
2251         /* +1 for \0 */
2252         buf = kmalloc(len + 1, GFP_KERNEL);
2253         if (!buf) {
2254                 mutex_unlock(&trace_types_lock);
2255                 return -ENOMEM;
2256         }
2257
2258         for (i = 0; trace_options[i]; i++) {
2259                 if (trace_flags & (1 << i))
2260                         r += sprintf(buf + r, "%s\n", trace_options[i]);
2261                 else
2262                         r += sprintf(buf + r, "no%s\n", trace_options[i]);
2263         }
2264
2265         for (i = 0; trace_opts[i].name; i++) {
2266                 if (tracer_flags & trace_opts[i].bit)
2267                         r += sprintf(buf + r, "%s\n",
2268                                 trace_opts[i].name);
2269                 else
2270                         r += sprintf(buf + r, "no%s\n",
2271                                 trace_opts[i].name);
2272         }
2273         mutex_unlock(&trace_types_lock);
2274
2275         WARN_ON(r >= len + 1);
2276
2277         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2278
2279         kfree(buf);
2280         return r;
2281 }
2282
2283 /* Try to assign a tracer specific option */
2284 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2285 {
2286         struct tracer_flags *trace_flags = trace->flags;
2287         struct tracer_opt *opts = NULL;
2288         int ret = 0, i = 0;
2289         int len;
2290
2291         for (i = 0; trace_flags->opts[i].name; i++) {
2292                 opts = &trace_flags->opts[i];
2293                 len = strlen(opts->name);
2294
2295                 if (strncmp(cmp, opts->name, len) == 0) {
2296                         ret = trace->set_flag(trace_flags->val,
2297                                 opts->bit, !neg);
2298                         break;
2299                 }
2300         }
2301         /* Not found */
2302         if (!trace_flags->opts[i].name)
2303                 return -EINVAL;
2304
2305         /* Refused to handle */
2306         if (ret)
2307                 return ret;
2308
2309         if (neg)
2310                 trace_flags->val &= ~opts->bit;
2311         else
2312                 trace_flags->val |= opts->bit;
2313
2314         return 0;
2315 }
2316
2317 static void set_tracer_flags(unsigned int mask, int enabled)
2318 {
2319         /* do nothing if flag is already set */
2320         if (!!(trace_flags & mask) == !!enabled)
2321                 return;
2322
2323         if (enabled)
2324                 trace_flags |= mask;
2325         else
2326                 trace_flags &= ~mask;
2327
2328         if (mask == TRACE_ITER_GLOBAL_CLK) {
2329                 u64 (*func)(void);
2330
2331                 if (enabled)
2332                         func = trace_clock_global;
2333                 else
2334                         func = trace_clock_local;
2335
2336                 mutex_lock(&trace_types_lock);
2337                 ring_buffer_set_clock(global_trace.buffer, func);
2338
2339                 if (max_tr.buffer)
2340                         ring_buffer_set_clock(max_tr.buffer, func);
2341                 mutex_unlock(&trace_types_lock);
2342         }
2343 }
2344
2345 static ssize_t
2346 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2347                         size_t cnt, loff_t *ppos)
2348 {
2349         char buf[64];
2350         char *cmp = buf;
2351         int neg = 0;
2352         int ret;
2353         int i;
2354
2355         if (cnt >= sizeof(buf))
2356                 return -EINVAL;
2357
2358         if (copy_from_user(&buf, ubuf, cnt))
2359                 return -EFAULT;
2360
2361         buf[cnt] = 0;
2362
2363         if (strncmp(buf, "no", 2) == 0) {
2364                 neg = 1;
2365                 cmp += 2;
2366         }
2367
2368         for (i = 0; trace_options[i]; i++) {
2369                 int len = strlen(trace_options[i]);
2370
2371                 if (strncmp(cmp, trace_options[i], len) == 0) {
2372                         set_tracer_flags(1 << i, !neg);
2373                         break;
2374                 }
2375         }
2376
2377         /* If no option could be set, test the specific tracer options */
2378         if (!trace_options[i]) {
2379                 mutex_lock(&trace_types_lock);
2380                 ret = set_tracer_option(current_trace, cmp, neg);
2381                 mutex_unlock(&trace_types_lock);
2382                 if (ret)
2383                         return ret;
2384         }
2385
2386         filp->f_pos += cnt;
2387
2388         return cnt;
2389 }
2390
2391 static const struct file_operations tracing_iter_fops = {
2392         .open           = tracing_open_generic,
2393         .read           = tracing_trace_options_read,
2394         .write          = tracing_trace_options_write,
2395 };
2396
2397 static const char readme_msg[] =
2398         "tracing mini-HOWTO:\n\n"
2399         "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2400         "# cat /sys/kernel/debug/tracing/available_tracers\n"
2401         "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2402         "# cat /sys/kernel/debug/tracing/current_tracer\n"
2403         "nop\n"
2404         "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
2405         "# cat /sys/kernel/debug/tracing/current_tracer\n"
2406         "sched_switch\n"
2407         "# cat /sys/kernel/debug/tracing/trace_options\n"
2408         "noprint-parent nosym-offset nosym-addr noverbose\n"
2409         "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2410         "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
2411         "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2412         "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
2413 ;
2414
2415 static ssize_t
2416 tracing_readme_read(struct file *filp, char __user *ubuf,
2417                        size_t cnt, loff_t *ppos)
2418 {
2419         return simple_read_from_buffer(ubuf, cnt, ppos,
2420                                         readme_msg, strlen(readme_msg));
2421 }
2422
2423 static const struct file_operations tracing_readme_fops = {
2424         .open           = tracing_open_generic,
2425         .read           = tracing_readme_read,
2426 };
2427
2428 static ssize_t
2429 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2430                                 size_t cnt, loff_t *ppos)
2431 {
2432         char *buf_comm;
2433         char *file_buf;
2434         char *buf;
2435         int len = 0;
2436         int pid;
2437         int i;
2438
2439         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2440         if (!file_buf)
2441                 return -ENOMEM;
2442
2443         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2444         if (!buf_comm) {
2445                 kfree(file_buf);
2446                 return -ENOMEM;
2447         }
2448
2449         buf = file_buf;
2450
2451         for (i = 0; i < SAVED_CMDLINES; i++) {
2452                 int r;
2453
2454                 pid = map_cmdline_to_pid[i];
2455                 if (pid == -1 || pid == NO_CMDLINE_MAP)
2456                         continue;
2457
2458                 trace_find_cmdline(pid, buf_comm);
2459                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
2460                 buf += r;
2461                 len += r;
2462         }
2463
2464         len = simple_read_from_buffer(ubuf, cnt, ppos,
2465                                       file_buf, len);
2466
2467         kfree(file_buf);
2468         kfree(buf_comm);
2469
2470         return len;
2471 }
2472
2473 static const struct file_operations tracing_saved_cmdlines_fops = {
2474     .open       = tracing_open_generic,
2475     .read       = tracing_saved_cmdlines_read,
2476 };
2477
2478 static ssize_t
2479 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2480                   size_t cnt, loff_t *ppos)
2481 {
2482         char buf[64];
2483         int r;
2484
2485         r = sprintf(buf, "%u\n", tracer_enabled);
2486         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2487 }
2488
2489 static ssize_t
2490 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2491                    size_t cnt, loff_t *ppos)
2492 {
2493         struct trace_array *tr = filp->private_data;
2494         char buf[64];
2495         unsigned long val;
2496         int ret;
2497
2498         if (cnt >= sizeof(buf))
2499                 return -EINVAL;
2500
2501         if (copy_from_user(&buf, ubuf, cnt))
2502                 return -EFAULT;
2503
2504         buf[cnt] = 0;
2505
2506         ret = strict_strtoul(buf, 10, &val);
2507         if (ret < 0)
2508                 return ret;
2509
2510         val = !!val;
2511
2512         mutex_lock(&trace_types_lock);
2513         if (tracer_enabled ^ val) {
2514                 if (val) {
2515                         tracer_enabled = 1;
2516                         if (current_trace->start)
2517                                 current_trace->start(tr);
2518                         tracing_start();
2519                 } else {
2520                         tracer_enabled = 0;
2521                         tracing_stop();
2522                         if (current_trace->stop)
2523                                 current_trace->stop(tr);
2524                 }
2525         }
2526         mutex_unlock(&trace_types_lock);
2527
2528         filp->f_pos += cnt;
2529
2530         return cnt;
2531 }
2532
2533 static ssize_t
2534 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2535                        size_t cnt, loff_t *ppos)
2536 {
2537         char buf[max_tracer_type_len+2];
2538         int r;
2539
2540         mutex_lock(&trace_types_lock);
2541         if (current_trace)
2542                 r = sprintf(buf, "%s\n", current_trace->name);
2543         else
2544                 r = sprintf(buf, "\n");
2545         mutex_unlock(&trace_types_lock);
2546
2547         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2548 }
2549
2550 int tracer_init(struct tracer *t, struct trace_array *tr)
2551 {
2552         tracing_reset_online_cpus(tr);
2553         return t->init(tr);
2554 }
2555
2556 static int tracing_resize_ring_buffer(unsigned long size)
2557 {
2558         int ret;
2559
2560         /*
2561          * If kernel or user changes the size of the ring buffer
2562          * we use the size that was given, and we can forget about
2563          * expanding it later.
2564          */
2565         ring_buffer_expanded = 1;
2566
2567         ret = ring_buffer_resize(global_trace.buffer, size);
2568         if (ret < 0)
2569                 return ret;
2570
2571         ret = ring_buffer_resize(max_tr.buffer, size);
2572         if (ret < 0) {
2573                 int r;
2574
2575                 r = ring_buffer_resize(global_trace.buffer,
2576                                        global_trace.entries);
2577                 if (r < 0) {
2578                         /*
2579                          * AARGH! We are left with different
2580                          * size max buffer!!!!
2581                          * The max buffer is our "snapshot" buffer.
2582                          * When a tracer needs a snapshot (one of the
2583                          * latency tracers), it swaps the max buffer
2584                          * with the saved snap shot. We succeeded to
2585                          * update the size of the main buffer, but failed to
2586                          * update the size of the max buffer. But when we tried
2587                          * to reset the main buffer to the original size, we
2588                          * failed there too. This is very unlikely to
2589                          * happen, but if it does, warn and kill all
2590                          * tracing.
2591                          */
2592                         WARN_ON(1);
2593                         tracing_disabled = 1;
2594                 }
2595                 return ret;
2596         }
2597
2598         global_trace.entries = size;
2599
2600         return ret;
2601 }
2602
2603 /**
2604  * tracing_update_buffers - used by tracing facility to expand ring buffers
2605  *
2606  * To save on memory when the tracing is never used on a system with it
2607  * configured in. The ring buffers are set to a minimum size. But once
2608  * a user starts to use the tracing facility, then they need to grow
2609  * to their default size.
2610  *
2611  * This function is to be called when a tracer is about to be used.
2612  */
2613 int tracing_update_buffers(void)
2614 {
2615         int ret = 0;
2616
2617         mutex_lock(&trace_types_lock);
2618         if (!ring_buffer_expanded)
2619                 ret = tracing_resize_ring_buffer(trace_buf_size);
2620         mutex_unlock(&trace_types_lock);
2621
2622         return ret;
2623 }
2624
2625 struct trace_option_dentry;
2626
2627 static struct trace_option_dentry *
2628 create_trace_option_files(struct tracer *tracer);
2629
2630 static void
2631 destroy_trace_option_files(struct trace_option_dentry *topts);
2632
2633 static int tracing_set_tracer(const char *buf)
2634 {
2635         static struct trace_option_dentry *topts;
2636         struct trace_array *tr = &global_trace;
2637         struct tracer *t;
2638         int ret = 0;
2639
2640         mutex_lock(&trace_types_lock);
2641
2642         if (!ring_buffer_expanded) {
2643                 ret = tracing_resize_ring_buffer(trace_buf_size);
2644                 if (ret < 0)
2645                         goto out;
2646                 ret = 0;
2647         }
2648
2649         for (t = trace_types; t; t = t->next) {
2650                 if (strcmp(t->name, buf) == 0)
2651                         break;
2652         }
2653         if (!t) {
2654                 ret = -EINVAL;
2655                 goto out;
2656         }
2657         if (t == current_trace)
2658                 goto out;
2659
2660         trace_branch_disable();
2661         if (current_trace && current_trace->reset)
2662                 current_trace->reset(tr);
2663
2664         destroy_trace_option_files(topts);
2665
2666         current_trace = t;
2667
2668         topts = create_trace_option_files(current_trace);
2669
2670         if (t->init) {
2671                 ret = tracer_init(t, tr);
2672                 if (ret)
2673                         goto out;
2674         }
2675
2676         trace_branch_enable(tr);
2677  out:
2678         mutex_unlock(&trace_types_lock);
2679
2680         return ret;
2681 }
2682
2683 static ssize_t
2684 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2685                         size_t cnt, loff_t *ppos)
2686 {
2687         char buf[max_tracer_type_len+1];
2688         int i;
2689         size_t ret;
2690         int err;
2691
2692         ret = cnt;
2693
2694         if (cnt > max_tracer_type_len)
2695                 cnt = max_tracer_type_len;
2696
2697         if (copy_from_user(&buf, ubuf, cnt))
2698                 return -EFAULT;
2699
2700         buf[cnt] = 0;
2701
2702         /* strip ending whitespace. */
2703         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2704                 buf[i] = 0;
2705
2706         err = tracing_set_tracer(buf);
2707         if (err)
2708                 return err;
2709
2710         filp->f_pos += ret;
2711
2712         return ret;
2713 }
2714
2715 static ssize_t
2716 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2717                      size_t cnt, loff_t *ppos)
2718 {
2719         unsigned long *ptr = filp->private_data;
2720         char buf[64];
2721         int r;
2722
2723         r = snprintf(buf, sizeof(buf), "%ld\n",
2724                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2725         if (r > sizeof(buf))
2726                 r = sizeof(buf);
2727         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2728 }
2729
2730 static ssize_t
2731 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2732                       size_t cnt, loff_t *ppos)
2733 {
2734         unsigned long *ptr = filp->private_data;
2735         char buf[64];
2736         unsigned long val;
2737         int ret;
2738
2739         if (cnt >= sizeof(buf))
2740                 return -EINVAL;
2741
2742         if (copy_from_user(&buf, ubuf, cnt))
2743                 return -EFAULT;
2744
2745         buf[cnt] = 0;
2746
2747         ret = strict_strtoul(buf, 10, &val);
2748         if (ret < 0)
2749                 return ret;
2750
2751         *ptr = val * 1000;
2752
2753         return cnt;
2754 }
2755
2756 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2757 {
2758         long cpu_file = (long) inode->i_private;
2759         struct trace_iterator *iter;
2760         int ret = 0;
2761
2762         if (tracing_disabled)
2763                 return -ENODEV;
2764
2765         mutex_lock(&trace_types_lock);
2766
2767         /* We only allow one reader per cpu */
2768         if (cpu_file == TRACE_PIPE_ALL_CPU) {
2769                 if (!cpumask_empty(tracing_reader_cpumask)) {
2770                         ret = -EBUSY;
2771                         goto out;
2772                 }
2773                 cpumask_setall(tracing_reader_cpumask);
2774         } else {
2775                 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2776                         cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2777                 else {
2778                         ret = -EBUSY;
2779                         goto out;
2780                 }
2781         }
2782
2783         /* create a buffer to store the information to pass to userspace */
2784         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2785         if (!iter) {
2786                 ret = -ENOMEM;
2787                 goto out;
2788         }
2789
2790         /*
2791          * We make a copy of the current tracer to avoid concurrent
2792          * changes on it while we are reading.
2793          */
2794         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2795         if (!iter->trace) {
2796                 ret = -ENOMEM;
2797                 goto fail;
2798         }
2799         if (current_trace)
2800                 *iter->trace = *current_trace;
2801
2802         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2803                 ret = -ENOMEM;
2804                 goto fail;
2805         }
2806
2807         /* trace pipe does not show start of buffer */
2808         cpumask_setall(iter->started);
2809
2810         if (trace_flags & TRACE_ITER_LATENCY_FMT)
2811                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2812
2813         iter->cpu_file = cpu_file;
2814         iter->tr = &global_trace;
2815         mutex_init(&iter->mutex);
2816         filp->private_data = iter;
2817
2818         if (iter->trace->pipe_open)
2819                 iter->trace->pipe_open(iter);
2820
2821 out:
2822         mutex_unlock(&trace_types_lock);
2823         return ret;
2824
2825 fail:
2826         kfree(iter->trace);
2827         kfree(iter);
2828         mutex_unlock(&trace_types_lock);
2829         return ret;
2830 }
2831
2832 static int tracing_release_pipe(struct inode *inode, struct file *file)
2833 {
2834         struct trace_iterator *iter = file->private_data;
2835
2836         mutex_lock(&trace_types_lock);
2837
2838         if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2839                 cpumask_clear(tracing_reader_cpumask);
2840         else
2841                 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2842
2843         mutex_unlock(&trace_types_lock);
2844
2845         free_cpumask_var(iter->started);
2846         mutex_destroy(&iter->mutex);
2847         kfree(iter->trace);
2848         kfree(iter);
2849
2850         return 0;
2851 }
2852
2853 static unsigned int
2854 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2855 {
2856         struct trace_iterator *iter = filp->private_data;
2857
2858         if (trace_flags & TRACE_ITER_BLOCK) {
2859                 /*
2860                  * Always select as readable when in blocking mode
2861                  */
2862                 return POLLIN | POLLRDNORM;
2863         } else {
2864                 if (!trace_empty(iter))
2865                         return POLLIN | POLLRDNORM;
2866                 poll_wait(filp, &trace_wait, poll_table);
2867                 if (!trace_empty(iter))
2868                         return POLLIN | POLLRDNORM;
2869
2870                 return 0;
2871         }
2872 }
2873
2874
2875 void default_wait_pipe(struct trace_iterator *iter)
2876 {
2877         DEFINE_WAIT(wait);
2878
2879         prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
2880
2881         if (trace_empty(iter))
2882                 schedule();
2883
2884         finish_wait(&trace_wait, &wait);
2885 }
2886
2887 /*
2888  * This is a make-shift waitqueue.
2889  * A tracer might use this callback on some rare cases:
2890  *
2891  *  1) the current tracer might hold the runqueue lock when it wakes up
2892  *     a reader, hence a deadlock (sched, function, and function graph tracers)
2893  *  2) the function tracers, trace all functions, we don't want
2894  *     the overhead of calling wake_up and friends
2895  *     (and tracing them too)
2896  *
2897  *     Anyway, this is really very primitive wakeup.
2898  */
2899 void poll_wait_pipe(struct trace_iterator *iter)
2900 {
2901         set_current_state(TASK_INTERRUPTIBLE);
2902         /* sleep for 100 msecs, and try again. */
2903         schedule_timeout(HZ / 10);
2904 }
2905
2906 /* Must be called with trace_types_lock mutex held. */
2907 static int tracing_wait_pipe(struct file *filp)
2908 {
2909         struct trace_iterator *iter = filp->private_data;
2910
2911         while (trace_empty(iter)) {
2912
2913                 if ((filp->f_flags & O_NONBLOCK)) {
2914                         return -EAGAIN;
2915                 }
2916
2917                 mutex_unlock(&iter->mutex);
2918
2919                 iter->trace->wait_pipe(iter);
2920
2921                 mutex_lock(&iter->mutex);
2922
2923                 if (signal_pending(current))
2924                         return -EINTR;
2925
2926                 /*
2927                  * We block until we read something and tracing is disabled.
2928                  * We still block if tracing is disabled, but we have never
2929                  * read anything. This allows a user to cat this file, and
2930                  * then enable tracing. But after we have read something,
2931                  * we give an EOF when tracing is again disabled.
2932                  *
2933                  * iter->pos will be 0 if we haven't read anything.
2934                  */
2935                 if (!tracer_enabled && iter->pos)
2936                         break;
2937         }
2938
2939         return 1;
2940 }
2941
2942 /*
2943  * Consumer reader.
2944  */
2945 static ssize_t
2946 tracing_read_pipe(struct file *filp, char __user *ubuf,
2947                   size_t cnt, loff_t *ppos)
2948 {
2949         struct trace_iterator *iter = filp->private_data;
2950         static struct tracer *old_tracer;
2951         ssize_t sret;
2952
2953         /* return any leftover data */
2954         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2955         if (sret != -EBUSY)
2956                 return sret;
2957
2958         trace_seq_init(&iter->seq);
2959
2960         /* copy the tracer to avoid using a global lock all around */
2961         mutex_lock(&trace_types_lock);
2962         if (unlikely(old_tracer != current_trace && current_trace)) {
2963                 old_tracer = current_trace;
2964                 *iter->trace = *current_trace;
2965         }
2966         mutex_unlock(&trace_types_lock);
2967
2968         /*
2969          * Avoid more than one consumer on a single file descriptor
2970          * This is just a matter of traces coherency, the ring buffer itself
2971          * is protected.
2972          */
2973         mutex_lock(&iter->mutex);
2974         if (iter->trace->read) {
2975                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2976                 if (sret)
2977                         goto out;
2978         }
2979
2980 waitagain:
2981         sret = tracing_wait_pipe(filp);
2982         if (sret <= 0)
2983                 goto out;
2984
2985         /* stop when tracing is finished */
2986         if (trace_empty(iter)) {
2987                 sret = 0;
2988                 goto out;
2989         }
2990
2991         if (cnt >= PAGE_SIZE)
2992                 cnt = PAGE_SIZE - 1;
2993
2994         /* reset all but tr, trace, and overruns */
2995         memset(&iter->seq, 0,
2996                sizeof(struct trace_iterator) -
2997                offsetof(struct trace_iterator, seq));
2998         iter->pos = -1;
2999
3000         trace_event_read_lock();
3001         while (find_next_entry_inc(iter) != NULL) {
3002                 enum print_line_t ret;
3003                 int len = iter->seq.len;
3004
3005                 ret = print_trace_line(iter);
3006                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3007                         /* don't print partial lines */
3008                         iter->seq.len = len;
3009                         break;
3010                 }
3011                 if (ret != TRACE_TYPE_NO_CONSUME)
3012                         trace_consume(iter);
3013
3014                 if (iter->seq.len >= cnt)
3015                         break;
3016         }
3017         trace_event_read_unlock();
3018
3019         /* Now copy what we have to the user */
3020         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3021         if (iter->seq.readpos >= iter->seq.len)
3022                 trace_seq_init(&iter->seq);
3023
3024         /*
3025          * If there was nothing to send to user, inspite of consuming trace
3026          * entries, go back to wait for more entries.
3027          */
3028         if (sret == -EBUSY)
3029                 goto waitagain;
3030
3031 out:
3032         mutex_unlock(&iter->mutex);
3033
3034         return sret;
3035 }
3036
3037 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
3038                                      struct pipe_buffer *buf)
3039 {
3040         __free_page(buf->page);
3041 }
3042
3043 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3044                                      unsigned int idx)
3045 {
3046         __free_page(spd->pages[idx]);
3047 }
3048
3049 static struct pipe_buf_operations tracing_pipe_buf_ops = {
3050         .can_merge              = 0,
3051         .map                    = generic_pipe_buf_map,
3052         .unmap                  = generic_pipe_buf_unmap,
3053         .confirm                = generic_pipe_buf_confirm,
3054         .release                = tracing_pipe_buf_release,
3055         .steal                  = generic_pipe_buf_steal,
3056         .get                    = generic_pipe_buf_get,
3057 };
3058
3059 static size_t
3060 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3061 {
3062         size_t count;
3063         int ret;
3064
3065         /* Seq buffer is page-sized, exactly what we need. */
3066         for (;;) {
3067                 count = iter->seq.len;
3068                 ret = print_trace_line(iter);
3069                 count = iter->seq.len - count;
3070                 if (rem < count) {
3071                         rem = 0;
3072                         iter->seq.len -= count;
3073                         break;
3074                 }
3075                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3076                         iter->seq.len -= count;
3077                         break;
3078                 }
3079
3080                 trace_consume(iter);
3081                 rem -= count;
3082                 if (!find_next_entry_inc(iter)) {
3083                         rem = 0;
3084                         iter->ent = NULL;
3085                         break;
3086                 }
3087         }
3088
3089         return rem;
3090 }
3091
3092 static ssize_t tracing_splice_read_pipe(struct file *filp,
3093                                         loff_t *ppos,
3094                                         struct pipe_inode_info *pipe,
3095                                         size_t len,
3096                                         unsigned int flags)
3097 {
3098         struct page *pages[PIPE_BUFFERS];
3099         struct partial_page partial[PIPE_BUFFERS];
3100         struct trace_iterator *iter = filp->private_data;
3101         struct splice_pipe_desc spd = {
3102                 .pages          = pages,
3103                 .partial        = partial,
3104                 .nr_pages       = 0, /* This gets updated below. */
3105                 .flags          = flags,
3106                 .ops            = &tracing_pipe_buf_ops,
3107                 .spd_release    = tracing_spd_release_pipe,
3108         };
3109         static struct tracer *old_tracer;
3110         ssize_t ret;
3111         size_t rem;
3112         unsigned int i;
3113
3114         /* copy the tracer to avoid using a global lock all around */
3115         mutex_lock(&trace_types_lock);
3116         if (unlikely(old_tracer != current_trace && current_trace)) {
3117                 old_tracer = current_trace;
3118                 *iter->trace = *current_trace;
3119         }
3120         mutex_unlock(&trace_types_lock);
3121
3122         mutex_lock(&iter->mutex);
3123
3124         if (iter->trace->splice_read) {
3125                 ret = iter->trace->splice_read(iter, filp,
3126                                                ppos, pipe, len, flags);
3127                 if (ret)
3128                         goto out_err;
3129         }
3130
3131         ret = tracing_wait_pipe(filp);
3132         if (ret <= 0)
3133                 goto out_err;
3134
3135         if (!iter->ent && !find_next_entry_inc(iter)) {
3136                 ret = -EFAULT;
3137                 goto out_err;
3138         }
3139
3140         trace_event_read_lock();
3141
3142         /* Fill as many pages as possible. */
3143         for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
3144                 pages[i] = alloc_page(GFP_KERNEL);
3145                 if (!pages[i])
3146                         break;
3147
3148                 rem = tracing_fill_pipe_page(rem, iter);
3149
3150                 /* Copy the data into the page, so we can start over. */
3151                 ret = trace_seq_to_buffer(&iter->seq,
3152                                           page_address(pages[i]),
3153                                           iter->seq.len);
3154                 if (ret < 0) {
3155                         __free_page(pages[i]);
3156                         break;
3157                 }
3158                 partial[i].offset = 0;
3159                 partial[i].len = iter->seq.len;
3160
3161                 trace_seq_init(&iter->seq);
3162         }
3163
3164         trace_event_read_unlock();
3165         mutex_unlock(&iter->mutex);
3166
3167         spd.nr_pages = i;
3168
3169         return splice_to_pipe(pipe, &spd);
3170
3171 out_err:
3172         mutex_unlock(&iter->mutex);
3173
3174         return ret;
3175 }
3176
3177 static ssize_t
3178 tracing_entries_read(struct file *filp, char __user *ubuf,
3179                      size_t cnt, loff_t *ppos)
3180 {
3181         struct trace_array *tr = filp->private_data;
3182         char buf[96];
3183         int r;
3184
3185         mutex_lock(&trace_types_lock);
3186         if (!ring_buffer_expanded)
3187                 r = sprintf(buf, "%lu (expanded: %lu)\n",
3188                             tr->entries >> 10,
3189                             trace_buf_size >> 10);
3190         else
3191                 r = sprintf(buf, "%lu\n", tr->entries >> 10);
3192         mutex_unlock(&trace_types_lock);
3193
3194         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3195 }
3196
3197 static ssize_t
3198 tracing_entries_write(struct file *filp, const char __user *ubuf,
3199                       size_t cnt, loff_t *ppos)
3200 {
3201         unsigned long val;
3202         char buf[64];
3203         int ret, cpu;
3204
3205         if (cnt >= sizeof(buf))
3206                 return -EINVAL;
3207
3208         if (copy_from_user(&buf, ubuf, cnt))
3209                 return -EFAULT;
3210
3211         buf[cnt] = 0;
3212
3213         ret = strict_strtoul(buf, 10, &val);
3214         if (ret < 0)
3215                 return ret;
3216
3217         /* must have at least 1 entry */
3218         if (!val)
3219                 return -EINVAL;
3220
3221         mutex_lock(&trace_types_lock);
3222
3223         tracing_stop();
3224
3225         /* disable all cpu buffers */
3226         for_each_tracing_cpu(cpu) {
3227                 if (global_trace.data[cpu])
3228                         atomic_inc(&global_trace.data[cpu]->disabled);
3229                 if (max_tr.data[cpu])
3230                         atomic_inc(&max_tr.data[cpu]->disabled);
3231         }
3232
3233         /* value is in KB */
3234         val <<= 10;
3235
3236         if (val != global_trace.entries) {
3237                 ret = tracing_resize_ring_buffer(val);
3238                 if (ret < 0) {
3239                         cnt = ret;
3240                         goto out;
3241                 }
3242         }
3243
3244         filp->f_pos += cnt;
3245
3246         /* If check pages failed, return ENOMEM */
3247         if (tracing_disabled)
3248                 cnt = -ENOMEM;
3249  out:
3250         for_each_tracing_cpu(cpu) {
3251                 if (global_trace.data[cpu])
3252                         atomic_dec(&global_trace.data[cpu]->disabled);
3253                 if (max_tr.data[cpu])
3254                         atomic_dec(&max_tr.data[cpu]->disabled);
3255         }
3256
3257         tracing_start();
3258         max_tr.entries = global_trace.entries;
3259         mutex_unlock(&trace_types_lock);
3260
3261         return cnt;
3262 }
3263
3264 static int mark_printk(const char *fmt, ...)
3265 {
3266         int ret;
3267         va_list args;
3268         va_start(args, fmt);
3269         ret = trace_vprintk(0, fmt, args);
3270         va_end(args);
3271         return ret;
3272 }
3273
3274 static ssize_t
3275 tracing_mark_write(struct file *filp, const char __user *ubuf,
3276                                         size_t cnt, loff_t *fpos)
3277 {
3278         char *buf;
3279         char *end;
3280
3281         if (tracing_disabled)
3282                 return -EINVAL;
3283
3284         if (cnt > TRACE_BUF_SIZE)
3285                 cnt = TRACE_BUF_SIZE;
3286
3287         buf = kmalloc(cnt + 1, GFP_KERNEL);
3288         if (buf == NULL)
3289                 return -ENOMEM;
3290
3291         if (copy_from_user(buf, ubuf, cnt)) {
3292                 kfree(buf);
3293                 return -EFAULT;
3294         }
3295
3296         /* Cut from the first nil or newline. */
3297         buf[cnt] = '\0';
3298         end = strchr(buf, '\n');
3299         if (end)
3300                 *end = '\0';
3301
3302         cnt = mark_printk("%s\n", buf);
3303         kfree(buf);
3304         *fpos += cnt;
3305
3306         return cnt;
3307 }
3308
3309 static const struct file_operations tracing_max_lat_fops = {
3310         .open           = tracing_open_generic,
3311         .read           = tracing_max_lat_read,
3312         .write          = tracing_max_lat_write,
3313 };
3314
3315 static const struct file_operations tracing_ctrl_fops = {
3316         .open           = tracing_open_generic,
3317         .read           = tracing_ctrl_read,
3318         .write          = tracing_ctrl_write,
3319 };
3320
3321 static const struct file_operations set_tracer_fops = {
3322         .open           = tracing_open_generic,
3323         .read           = tracing_set_trace_read,
3324         .write          = tracing_set_trace_write,
3325 };
3326
3327 static const struct file_operations tracing_pipe_fops = {
3328         .open           = tracing_open_pipe,
3329         .poll           = tracing_poll_pipe,
3330         .read           = tracing_read_pipe,
3331         .splice_read    = tracing_splice_read_pipe,
3332         .release        = tracing_release_pipe,
3333 };
3334
3335 static const struct file_operations tracing_entries_fops = {
3336         .open           = tracing_open_generic,
3337         .read           = tracing_entries_read,
3338         .write          = tracing_entries_write,
3339 };
3340
3341 static const struct file_operations tracing_mark_fops = {
3342         .open           = tracing_open_generic,
3343         .write          = tracing_mark_write,
3344 };
3345
3346 struct ftrace_buffer_info {
3347         struct trace_array      *tr;
3348         void                    *spare;
3349         int                     cpu;
3350         unsigned int            read;
3351 };
3352
3353 static int tracing_buffers_open(struct inode *inode, struct file *filp)
3354 {
3355         int cpu = (int)(long)inode->i_private;
3356         struct ftrace_buffer_info *info;
3357
3358         if (tracing_disabled)
3359                 return -ENODEV;
3360
3361         info = kzalloc(sizeof(*info), GFP_KERNEL);
3362         if (!info)
3363                 return -ENOMEM;
3364
3365         info->tr        = &global_trace;
3366         info->cpu       = cpu;
3367         info->spare     = NULL;
3368         /* Force reading ring buffer for first read */
3369         info->read      = (unsigned int)-1;
3370
3371         filp->private_data = info;
3372
3373         return nonseekable_open(inode, filp);
3374 }
3375
3376 static ssize_t
3377 tracing_buffers_read(struct file *filp, char __user *ubuf,
3378                      size_t count, loff_t *ppos)
3379 {
3380         struct ftrace_buffer_info *info = filp->private_data;
3381         unsigned int pos;
3382         ssize_t ret;
3383         size_t size;
3384
3385         if (!count)
3386                 return 0;
3387
3388         if (!info->spare)
3389                 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3390         if (!info->spare)
3391                 return -ENOMEM;
3392
3393         /* Do we have previous read data to read? */
3394         if (info->read < PAGE_SIZE)
3395                 goto read;
3396
3397         info->read = 0;
3398
3399         ret = ring_buffer_read_page(info->tr->buffer,
3400                                     &info->spare,
3401                                     count,
3402                                     info->cpu, 0);
3403         if (ret < 0)
3404                 return 0;
3405
3406         pos = ring_buffer_page_len(info->spare);
3407
3408         if (pos < PAGE_SIZE)
3409                 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3410
3411 read:
3412         size = PAGE_SIZE - info->read;
3413         if (size > count)
3414                 size = count;
3415
3416         ret = copy_to_user(ubuf, info->spare + info->read, size);
3417         if (ret == size)
3418                 return -EFAULT;
3419         size -= ret;
3420
3421         *ppos += size;
3422         info->read += size;
3423
3424         return size;
3425 }
3426
3427 static int tracing_buffers_release(struct inode *inode, struct file *file)
3428 {
3429         struct ftrace_buffer_info *info = file->private_data;
3430
3431         if (info->spare)
3432                 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3433         kfree(info);
3434
3435         return 0;
3436 }
3437
3438 struct buffer_ref {
3439         struct ring_buffer      *buffer;
3440         void                    *page;
3441         int                     ref;
3442 };
3443
3444 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3445                                     struct pipe_buffer *buf)
3446 {
3447         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3448
3449         if (--ref->ref)
3450                 return;
3451
3452         ring_buffer_free_read_page(ref->buffer, ref->page);
3453         kfree(ref);
3454         buf->private = 0;
3455 }
3456
3457 static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3458                                  struct pipe_buffer *buf)
3459 {
3460         return 1;
3461 }
3462
3463 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3464                                 struct pipe_buffer *buf)
3465 {
3466         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3467
3468         ref->ref++;
3469 }
3470
3471 /* Pipe buffer operations for a buffer. */
3472 static struct pipe_buf_operations buffer_pipe_buf_ops = {
3473         .can_merge              = 0,
3474         .map                    = generic_pipe_buf_map,
3475         .unmap                  = generic_pipe_buf_unmap,
3476         .confirm                = generic_pipe_buf_confirm,
3477         .release                = buffer_pipe_buf_release,
3478         .steal                  = buffer_pipe_buf_steal,
3479         .get                    = buffer_pipe_buf_get,
3480 };
3481
3482 /*
3483  * Callback from splice_to_pipe(), if we need to release some pages
3484  * at the end of the spd in case we error'ed out in filling the pipe.
3485  */
3486 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3487 {
3488         struct buffer_ref *ref =
3489                 (struct buffer_ref *)spd->partial[i].private;
3490
3491         if (--ref->ref)
3492                 return;
3493
3494         ring_buffer_free_read_page(ref->buffer, ref->page);
3495         kfree(ref);
3496         spd->partial[i].private = 0;
3497 }
3498
3499 static ssize_t
3500 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3501                             struct pipe_inode_info *pipe, size_t len,
3502                             unsigned int flags)
3503 {
3504         struct ftrace_buffer_info *info = file->private_data;
3505         struct partial_page partial[PIPE_BUFFERS];
3506         struct page *pages[PIPE_BUFFERS];
3507         struct splice_pipe_desc spd = {
3508                 .pages          = pages,
3509                 .partial        = partial,
3510                 .flags          = flags,
3511                 .ops            = &buffer_pipe_buf_ops,
3512                 .spd_release    = buffer_spd_release,
3513         };
3514         struct buffer_ref *ref;
3515         int entries, size, i;
3516         size_t ret;
3517
3518         if (*ppos & (PAGE_SIZE - 1)) {
3519                 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3520                 return -EINVAL;
3521         }
3522
3523         if (len & (PAGE_SIZE - 1)) {
3524                 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3525                 if (len < PAGE_SIZE)
3526                         return -EINVAL;
3527                 len &= PAGE_MASK;
3528         }
3529
3530         entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3531
3532         for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
3533                 struct page *page;
3534                 int r;
3535
3536                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3537                 if (!ref)
3538                         break;
3539
3540                 ref->ref = 1;
3541                 ref->buffer = info->tr->buffer;
3542                 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3543                 if (!ref->page) {
3544                         kfree(ref);
3545                         break;
3546                 }
3547
3548                 r = ring_buffer_read_page(ref->buffer, &ref->page,
3549                                           len, info->cpu, 1);
3550                 if (r < 0) {
3551                         ring_buffer_free_read_page(ref->buffer,
3552                                                    ref->page);
3553                         kfree(ref);
3554                         break;
3555                 }
3556
3557                 /*
3558                  * zero out any left over data, this is going to
3559                  * user land.
3560                  */
3561                 size = ring_buffer_page_len(ref->page);
3562                 if (size < PAGE_SIZE)
3563                         memset(ref->page + size, 0, PAGE_SIZE - size);
3564
3565                 page = virt_to_page(ref->page);
3566
3567                 spd.pages[i] = page;
3568                 spd.partial[i].len = PAGE_SIZE;
3569                 spd.partial[i].offset = 0;
3570                 spd.partial[i].private = (unsigned long)ref;
3571                 spd.nr_pages++;
3572                 *ppos += PAGE_SIZE;
3573
3574                 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3575         }
3576
3577         spd.nr_pages = i;
3578
3579         /* did we read anything? */
3580         if (!spd.nr_pages) {
3581                 if (flags & SPLICE_F_NONBLOCK)
3582                         ret = -EAGAIN;
3583                 else
3584                         ret = 0;
3585                 /* TODO: block */
3586                 return ret;
3587         }
3588
3589         ret = splice_to_pipe(pipe, &spd);
3590
3591         return ret;
3592 }
3593
3594 static const struct file_operations tracing_buffers_fops = {
3595         .open           = tracing_buffers_open,
3596         .read           = tracing_buffers_read,
3597         .release        = tracing_buffers_release,
3598         .splice_read    = tracing_buffers_splice_read,
3599         .llseek         = no_llseek,
3600 };
3601
3602 static ssize_t
3603 tracing_stats_read(struct file *filp, char __user *ubuf,
3604                    size_t count, loff_t *ppos)
3605 {
3606         unsigned long cpu = (unsigned long)filp->private_data;
3607         struct trace_array *tr = &global_trace;
3608         struct trace_seq *s;
3609         unsigned long cnt;
3610
3611         s = kmalloc(sizeof(*s), GFP_KERNEL);
3612         if (!s)
3613                 return ENOMEM;
3614
3615         trace_seq_init(s);
3616
3617         cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
3618         trace_seq_printf(s, "entries: %ld\n", cnt);
3619
3620         cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
3621         trace_seq_printf(s, "overrun: %ld\n", cnt);
3622
3623         cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3624         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3625
3626         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3627
3628         kfree(s);
3629
3630         return count;
3631 }
3632
3633 static const struct file_operations tracing_stats_fops = {
3634         .open           = tracing_open_generic,
3635         .read           = tracing_stats_read,
3636 };
3637
3638 #ifdef CONFIG_DYNAMIC_FTRACE
3639
3640 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3641 {
3642         return 0;
3643 }
3644
3645 static ssize_t
3646 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3647                   size_t cnt, loff_t *ppos)
3648 {
3649         static char ftrace_dyn_info_buffer[1024];
3650         static DEFINE_MUTEX(dyn_info_mutex);
3651         unsigned long *p = filp->private_data;
3652         char *buf = ftrace_dyn_info_buffer;
3653         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
3654         int r;
3655
3656         mutex_lock(&dyn_info_mutex);
3657         r = sprintf(buf, "%ld ", *p);
3658
3659         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3660         buf[r++] = '\n';
3661
3662         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3663
3664         mutex_unlock(&dyn_info_mutex);
3665
3666         return r;
3667 }
3668
3669 static const struct file_operations tracing_dyn_info_fops = {
3670         .open           = tracing_open_generic,
3671         .read           = tracing_read_dyn_info,
3672 };
3673 #endif
3674
3675 static struct dentry *d_tracer;
3676
3677 struct dentry *tracing_init_dentry(void)
3678 {
3679         static int once;
3680
3681         if (d_tracer)
3682                 return d_tracer;
3683
3684         if (!debugfs_initialized())
3685                 return NULL;
3686
3687         d_tracer = debugfs_create_dir("tracing", NULL);
3688
3689         if (!d_tracer && !once) {
3690                 once = 1;
3691                 pr_warning("Could not create debugfs directory 'tracing'\n");
3692                 return NULL;
3693         }
3694
3695         return d_tracer;
3696 }
3697
3698 static struct dentry *d_percpu;
3699
3700 struct dentry *tracing_dentry_percpu(void)
3701 {
3702         static int once;
3703         struct dentry *d_tracer;
3704
3705         if (d_percpu)
3706                 return d_percpu;
3707
3708         d_tracer = tracing_init_dentry();
3709
3710         if (!d_tracer)
3711                 return NULL;
3712
3713         d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3714
3715         if (!d_percpu && !once) {
3716                 once = 1;
3717                 pr_warning("Could not create debugfs directory 'per_cpu'\n");
3718                 return NULL;
3719         }
3720
3721         return d_percpu;
3722 }
3723
3724 static void tracing_init_debugfs_percpu(long cpu)
3725 {
3726         struct dentry *d_percpu = tracing_dentry_percpu();
3727         struct dentry *d_cpu;
3728         /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3729         char cpu_dir[7];
3730
3731         if (cpu > 999 || cpu < 0)
3732                 return;
3733
3734         sprintf(cpu_dir, "cpu%ld", cpu);
3735         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3736         if (!d_cpu) {
3737                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3738                 return;
3739         }
3740
3741         /* per cpu trace_pipe */
3742         trace_create_file("trace_pipe", 0444, d_cpu,
3743                         (void *) cpu, &tracing_pipe_fops);
3744
3745         /* per cpu trace */
3746         trace_create_file("trace", 0644, d_cpu,
3747                         (void *) cpu, &tracing_fops);
3748
3749         trace_create_file("trace_pipe_raw", 0444, d_cpu,
3750                         (void *) cpu, &tracing_buffers_fops);
3751
3752         trace_create_file("stats", 0444, d_cpu,
3753                         (void *) cpu, &tracing_stats_fops);
3754 }
3755
3756 #ifdef CONFIG_FTRACE_SELFTEST
3757 /* Let selftest have access to static functions in this file */
3758 #include "trace_selftest.c"
3759 #endif
3760
3761 struct trace_option_dentry {
3762         struct tracer_opt               *opt;
3763         struct tracer_flags             *flags;
3764         struct dentry                   *entry;
3765 };
3766
3767 static ssize_t
3768 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3769                         loff_t *ppos)
3770 {
3771         struct trace_option_dentry *topt = filp->private_data;
3772         char *buf;
3773
3774         if (topt->flags->val & topt->opt->bit)
3775                 buf = "1\n";
3776         else
3777                 buf = "0\n";
3778
3779         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3780 }
3781
3782 static ssize_t
3783 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3784                          loff_t *ppos)
3785 {
3786         struct trace_option_dentry *topt = filp->private_data;
3787         unsigned long val;
3788         char buf[64];
3789         int ret;
3790
3791         if (cnt >= sizeof(buf))
3792                 return -EINVAL;
3793
3794         if (copy_from_user(&buf, ubuf, cnt))
3795                 return -EFAULT;
3796
3797         buf[cnt] = 0;
3798
3799         ret = strict_strtoul(buf, 10, &val);
3800         if (ret < 0)
3801                 return ret;
3802
3803         ret = 0;
3804         switch (val) {
3805         case 0:
3806                 /* do nothing if already cleared */
3807                 if (!(topt->flags->val & topt->opt->bit))
3808                         break;
3809
3810                 mutex_lock(&trace_types_lock);
3811                 if (current_trace->set_flag)
3812                         ret = current_trace->set_flag(topt->flags->val,
3813                                                       topt->opt->bit, 0);
3814                 mutex_unlock(&trace_types_lock);
3815                 if (ret)
3816                         return ret;
3817                 topt->flags->val &= ~topt->opt->bit;
3818                 break;
3819         case 1:
3820                 /* do nothing if already set */
3821                 if (topt->flags->val & topt->opt->bit)
3822                         break;
3823
3824                 mutex_lock(&trace_types_lock);
3825                 if (current_trace->set_flag)
3826                         ret = current_trace->set_flag(topt->flags->val,
3827                                                       topt->opt->bit, 1);
3828                 mutex_unlock(&trace_types_lock);
3829                 if (ret)
3830                         return ret;
3831                 topt->flags->val |= topt->opt->bit;
3832                 break;
3833
3834         default:
3835                 return -EINVAL;
3836         }
3837
3838         *ppos += cnt;
3839
3840         return cnt;
3841 }
3842
3843
3844 static const struct file_operations trace_options_fops = {
3845         .open = tracing_open_generic,
3846         .read = trace_options_read,
3847         .write = trace_options_write,
3848 };
3849
3850 static ssize_t
3851 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3852                         loff_t *ppos)
3853 {
3854         long index = (long)filp->private_data;
3855         char *buf;
3856
3857         if (trace_flags & (1 << index))
3858                 buf = "1\n";
3859         else
3860                 buf = "0\n";
3861
3862         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3863 }
3864
3865 static ssize_t
3866 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3867                          loff_t *ppos)
3868 {
3869         long index = (long)filp->private_data;
3870         char buf[64];
3871         unsigned long val;
3872         int ret;
3873
3874         if (cnt >= sizeof(buf))
3875                 return -EINVAL;
3876
3877         if (copy_from_user(&buf, ubuf, cnt))
3878                 return -EFAULT;
3879
3880         buf[cnt] = 0;
3881
3882         ret = strict_strtoul(buf, 10, &val);
3883         if (ret < 0)
3884                 return ret;
3885
3886         switch (val) {
3887         case 0:
3888                 trace_flags &= ~(1 << index);
3889                 break;
3890         case 1:
3891                 trace_flags |= 1 << index;
3892                 break;
3893
3894         default:
3895                 return -EINVAL;
3896         }
3897
3898         *ppos += cnt;
3899
3900         return cnt;
3901 }
3902
3903 static const struct file_operations trace_options_core_fops = {
3904         .open = tracing_open_generic,
3905         .read = trace_options_core_read,
3906         .write = trace_options_core_write,
3907 };
3908
3909 struct dentry *trace_create_file(const char *name,
3910                                  mode_t mode,
3911                                  struct dentry *parent,
3912                                  void *data,
3913                                  const struct file_operations *fops)
3914 {
3915         struct dentry *ret;
3916
3917         ret = debugfs_create_file(name, mode, parent, data, fops);
3918         if (!ret)
3919                 pr_warning("Could not create debugfs '%s' entry\n", name);
3920
3921         return ret;
3922 }
3923
3924
3925 static struct dentry *trace_options_init_dentry(void)
3926 {
3927         struct dentry *d_tracer;
3928         static struct dentry *t_options;
3929
3930         if (t_options)
3931                 return t_options;
3932
3933         d_tracer = tracing_init_dentry();
3934         if (!d_tracer)
3935                 return NULL;
3936
3937         t_options = debugfs_create_dir("options", d_tracer);
3938         if (!t_options) {
3939                 pr_warning("Could not create debugfs directory 'options'\n");
3940                 return NULL;
3941         }
3942
3943         return t_options;
3944 }
3945
3946 static void
3947 create_trace_option_file(struct trace_option_dentry *topt,
3948                          struct tracer_flags *flags,
3949                          struct tracer_opt *opt)
3950 {
3951         struct dentry *t_options;
3952
3953         t_options = trace_options_init_dentry();
3954         if (!t_options)
3955                 return;
3956
3957         topt->flags = flags;
3958         topt->opt = opt;
3959
3960         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
3961                                     &trace_options_fops);
3962
3963 }
3964
3965 static struct trace_option_dentry *
3966 create_trace_option_files(struct tracer *tracer)
3967 {
3968         struct trace_option_dentry *topts;
3969         struct tracer_flags *flags;
3970         struct tracer_opt *opts;
3971         int cnt;
3972
3973         if (!tracer)
3974                 return NULL;
3975
3976         flags = tracer->flags;
3977
3978         if (!flags || !flags->opts)
3979                 return NULL;
3980
3981         opts = flags->opts;
3982
3983         for (cnt = 0; opts[cnt].name; cnt++)
3984                 ;
3985
3986         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
3987         if (!topts)
3988                 return NULL;
3989
3990         for (cnt = 0; opts[cnt].name; cnt++)
3991                 create_trace_option_file(&topts[cnt], flags,
3992                                          &opts[cnt]);
3993
3994         return topts;
3995 }
3996
3997 static void
3998 destroy_trace_option_files(struct trace_option_dentry *topts)
3999 {
4000         int cnt;
4001
4002         if (!topts)
4003                 return;
4004
4005         for (cnt = 0; topts[cnt].opt; cnt++) {
4006                 if (topts[cnt].entry)
4007                         debugfs_remove(topts[cnt].entry);
4008         }
4009
4010         kfree(topts);
4011 }
4012
4013 static struct dentry *
4014 create_trace_option_core_file(const char *option, long index)
4015 {
4016         struct dentry *t_options;
4017
4018         t_options = trace_options_init_dentry();
4019         if (!t_options)
4020                 return NULL;
4021
4022         return trace_create_file(option, 0644, t_options, (void *)index,
4023                                     &trace_options_core_fops);
4024 }
4025
4026 static __init void create_trace_options_dir(void)
4027 {
4028         struct dentry *t_options;
4029         int i;
4030
4031         t_options = trace_options_init_dentry();
4032         if (!t_options)
4033                 return;
4034
4035         for (i = 0; trace_options[i]; i++)
4036                 create_trace_option_core_file(trace_options[i], i);
4037 }
4038
4039 static __init int tracer_init_debugfs(void)
4040 {
4041         struct dentry *d_tracer;
4042         int cpu;
4043
4044         d_tracer = tracing_init_dentry();
4045
4046         trace_create_file("tracing_enabled", 0644, d_tracer,
4047                         &global_trace, &tracing_ctrl_fops);
4048
4049         trace_create_file("trace_options", 0644, d_tracer,
4050                         NULL, &tracing_iter_fops);
4051
4052         trace_create_file("tracing_cpumask", 0644, d_tracer,
4053                         NULL, &tracing_cpumask_fops);
4054
4055         trace_create_file("trace", 0644, d_tracer,
4056                         (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
4057
4058         trace_create_file("available_tracers", 0444, d_tracer,
4059                         &global_trace, &show_traces_fops);
4060
4061         trace_create_file("current_tracer", 0644, d_tracer,
4062                         &global_trace, &set_tracer_fops);
4063
4064         trace_create_file("tracing_max_latency", 0644, d_tracer,
4065                         &tracing_max_latency, &tracing_max_lat_fops);
4066
4067         trace_create_file("tracing_thresh", 0644, d_tracer,
4068                         &tracing_thresh, &tracing_max_lat_fops);
4069
4070         trace_create_file("README", 0444, d_tracer,
4071                         NULL, &tracing_readme_fops);
4072
4073         trace_create_file("trace_pipe", 0444, d_tracer,
4074                         (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4075
4076         trace_create_file("buffer_size_kb", 0644, d_tracer,
4077                         &global_trace, &tracing_entries_fops);
4078
4079         trace_create_file("trace_marker", 0220, d_tracer,
4080                         NULL, &tracing_mark_fops);
4081
4082         trace_create_file("saved_cmdlines", 0444, d_tracer,
4083                         NULL, &tracing_saved_cmdlines_fops);
4084
4085 #ifdef CONFIG_DYNAMIC_FTRACE
4086         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4087                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4088 #endif
4089 #ifdef CONFIG_SYSPROF_TRACER
4090         init_tracer_sysprof_debugfs(d_tracer);
4091 #endif
4092
4093         create_trace_options_dir();
4094
4095         for_each_tracing_cpu(cpu)
4096                 tracing_init_debugfs_percpu(cpu);
4097
4098         return 0;
4099 }
4100
4101 static int trace_panic_handler(struct notifier_block *this,
4102                                unsigned long event, void *unused)
4103 {
4104         if (ftrace_dump_on_oops)
4105                 ftrace_dump();
4106         return NOTIFY_OK;
4107 }
4108
4109 static struct notifier_block trace_panic_notifier = {
4110         .notifier_call  = trace_panic_handler,
4111         .next           = NULL,
4112         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
4113 };
4114
4115 static int trace_die_handler(struct notifier_block *self,
4116                              unsigned long val,
4117                              void *data)
4118 {
4119         switch (val) {
4120         case DIE_OOPS:
4121                 if (ftrace_dump_on_oops)
4122                         ftrace_dump();
4123                 break;
4124         default:
4125                 break;
4126         }
4127         return NOTIFY_OK;
4128 }
4129
4130 static struct notifier_block trace_die_notifier = {
4131         .notifier_call = trace_die_handler,
4132         .priority = 200
4133 };
4134
4135 /*
4136  * printk is set to max of 1024, we really don't need it that big.
4137  * Nothing should be printing 1000 characters anyway.
4138  */
4139 #define TRACE_MAX_PRINT         1000
4140
4141 /*
4142  * Define here KERN_TRACE so that we have one place to modify
4143  * it if we decide to change what log level the ftrace dump
4144  * should be at.
4145  */
4146 #define KERN_TRACE              KERN_EMERG
4147
4148 static void
4149 trace_printk_seq(struct trace_seq *s)
4150 {
4151         /* Probably should print a warning here. */
4152         if (s->len >= 1000)
4153                 s->len = 1000;
4154
4155         /* should be zero ended, but we are paranoid. */
4156         s->buffer[s->len] = 0;
4157
4158         printk(KERN_TRACE "%s", s->buffer);
4159
4160         trace_seq_init(s);
4161 }
4162
4163 static void __ftrace_dump(bool disable_tracing)
4164 {
4165         static raw_spinlock_t ftrace_dump_lock =
4166                 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
4167         /* use static because iter can be a bit big for the stack */
4168         static struct trace_iterator iter;
4169         unsigned int old_userobj;
4170         static int dump_ran;
4171         unsigned long flags;
4172         int cnt = 0, cpu;
4173
4174         /* only one dump */
4175         local_irq_save(flags);
4176         __raw_spin_lock(&ftrace_dump_lock);
4177         if (dump_ran)
4178                 goto out;
4179
4180         dump_ran = 1;
4181
4182         tracing_off();
4183
4184         if (disable_tracing)
4185                 ftrace_kill();
4186
4187         for_each_tracing_cpu(cpu) {
4188                 atomic_inc(&global_trace.data[cpu]->disabled);
4189         }
4190
4191         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
4192
4193         /* don't look at user memory in panic mode */
4194         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4195
4196         printk(KERN_TRACE "Dumping ftrace buffer:\n");
4197
4198         /* Simulate the iterator */
4199         iter.tr = &global_trace;
4200         iter.trace = current_trace;
4201         iter.cpu_file = TRACE_PIPE_ALL_CPU;
4202
4203         /*
4204          * We need to stop all tracing on all CPUS to read the
4205          * the next buffer. This is a bit expensive, but is
4206          * not done often. We fill all what we can read,
4207          * and then release the locks again.
4208          */
4209
4210         while (!trace_empty(&iter)) {
4211
4212                 if (!cnt)
4213                         printk(KERN_TRACE "---------------------------------\n");
4214
4215                 cnt++;
4216
4217                 /* reset all but tr, trace, and overruns */
4218                 memset(&iter.seq, 0,
4219                        sizeof(struct trace_iterator) -
4220                        offsetof(struct trace_iterator, seq));
4221                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
4222                 iter.pos = -1;
4223
4224                 if (find_next_entry_inc(&iter) != NULL) {
4225                         print_trace_line(&iter);
4226                         trace_consume(&iter);
4227                 }
4228
4229                 trace_printk_seq(&iter.seq);
4230         }
4231
4232         if (!cnt)
4233                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
4234         else
4235                 printk(KERN_TRACE "---------------------------------\n");
4236
4237         /* Re-enable tracing if requested */
4238         if (!disable_tracing) {
4239                 trace_flags |= old_userobj;
4240
4241                 for_each_tracing_cpu(cpu) {
4242                         atomic_dec(&global_trace.data[cpu]->disabled);
4243                 }
4244                 tracing_on();
4245         }
4246
4247  out:
4248         __raw_spin_unlock(&ftrace_dump_lock);
4249         local_irq_restore(flags);
4250 }
4251
4252 /* By default: disable tracing after the dump */
4253 void ftrace_dump(void)
4254 {
4255         __ftrace_dump(true);
4256 }
4257
4258 __init static int tracer_alloc_buffers(void)
4259 {
4260         int ring_buf_size;
4261         int i;
4262         int ret = -ENOMEM;
4263
4264         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4265                 goto out;
4266
4267         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4268                 goto out_free_buffer_mask;
4269
4270         if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4271                 goto out_free_tracing_cpumask;
4272
4273         /* To save memory, keep the ring buffer size to its minimum */
4274         if (ring_buffer_expanded)
4275                 ring_buf_size = trace_buf_size;
4276         else
4277                 ring_buf_size = 1;
4278
4279         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4280         cpumask_copy(tracing_cpumask, cpu_all_mask);
4281         cpumask_clear(tracing_reader_cpumask);
4282
4283         /* TODO: make the number of buffers hot pluggable with CPUS */
4284         global_trace.buffer = ring_buffer_alloc(ring_buf_size,
4285                                                    TRACE_BUFFER_FLAGS);
4286         if (!global_trace.buffer) {
4287                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
4288                 WARN_ON(1);
4289                 goto out_free_cpumask;
4290         }
4291         global_trace.entries = ring_buffer_size(global_trace.buffer);
4292
4293
4294 #ifdef CONFIG_TRACER_MAX_TRACE
4295         max_tr.buffer = ring_buffer_alloc(ring_buf_size,
4296                                              TRACE_BUFFER_FLAGS);
4297         if (!max_tr.buffer) {
4298                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4299                 WARN_ON(1);
4300                 ring_buffer_free(global_trace.buffer);
4301                 goto out_free_cpumask;
4302         }
4303         max_tr.entries = ring_buffer_size(max_tr.buffer);
4304         WARN_ON(max_tr.entries != global_trace.entries);
4305 #endif
4306
4307         /* Allocate the first page for all buffers */
4308         for_each_tracing_cpu(i) {
4309                 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4310                 max_tr.data[i] = &per_cpu(max_data, i);
4311         }
4312
4313         trace_init_cmdlines();
4314
4315         register_tracer(&nop_trace);
4316         current_trace = &nop_trace;
4317 #ifdef CONFIG_BOOT_TRACER
4318         register_tracer(&boot_tracer);
4319 #endif
4320         /* All seems OK, enable tracing */
4321         tracing_disabled = 0;
4322
4323         atomic_notifier_chain_register(&panic_notifier_list,
4324                                        &trace_panic_notifier);
4325
4326         register_die_notifier(&trace_die_notifier);
4327
4328         return 0;
4329
4330 out_free_cpumask:
4331         free_cpumask_var(tracing_reader_cpumask);
4332 out_free_tracing_cpumask:
4333         free_cpumask_var(tracing_cpumask);
4334 out_free_buffer_mask:
4335         free_cpumask_var(tracing_buffer_mask);
4336 out:
4337         return ret;
4338 }
4339
4340 __init static int clear_boot_tracer(void)
4341 {
4342         /*
4343          * The default tracer at boot buffer is an init section.
4344          * This function is called in lateinit. If we did not
4345          * find the boot tracer, then clear it out, to prevent
4346          * later registration from accessing the buffer that is
4347          * about to be freed.
4348          */
4349         if (!default_bootup_tracer)
4350                 return 0;
4351
4352         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
4353                default_bootup_tracer);
4354         default_bootup_tracer = NULL;
4355
4356         return 0;
4357 }
4358
4359 early_initcall(tracer_alloc_buffers);
4360 fs_initcall(tracer_init_debugfs);
4361 late_initcall(clear_boot_tracer);