ftrace: ftrace dump on oops control
[safe/jmp/linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/notifier.h>
18 #include <linux/debugfs.h>
19 #include <linux/pagemap.h>
20 #include <linux/hardirq.h>
21 #include <linux/linkage.h>
22 #include <linux/uaccess.h>
23 #include <linux/ftrace.h>
24 #include <linux/module.h>
25 #include <linux/percpu.h>
26 #include <linux/kdebug.h>
27 #include <linux/ctype.h>
28 #include <linux/init.h>
29 #include <linux/poll.h>
30 #include <linux/gfp.h>
31 #include <linux/fs.h>
32 #include <linux/kprobes.h>
33 #include <linux/writeback.h>
34
35 #include <linux/stacktrace.h>
36 #include <linux/ring_buffer.h>
37
38 #include "trace.h"
39
40 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
41
42 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
43 unsigned long __read_mostly     tracing_thresh;
44
45 static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
46
47 static inline void ftrace_disable_cpu(void)
48 {
49         preempt_disable();
50         local_inc(&__get_cpu_var(ftrace_cpu_disabled));
51 }
52
53 static inline void ftrace_enable_cpu(void)
54 {
55         local_dec(&__get_cpu_var(ftrace_cpu_disabled));
56         preempt_enable();
57 }
58
59 static cpumask_t __read_mostly          tracing_buffer_mask;
60
61 #define for_each_tracing_cpu(cpu)       \
62         for_each_cpu_mask(cpu, tracing_buffer_mask)
63
64 static int tracing_disabled = 1;
65
66 /*
67  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
68  *
69  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
70  * is set, then ftrace_dump is called. This will output the contents
71  * of the ftrace buffers to the console.  This is very useful for
72  * capturing traces that lead to crashes and outputing it to a
73  * serial console.
74  *
75  * It is default off, but you can enable it with either specifying
76  * "ftrace_dump_on_oops" in the kernel command line, or setting
77  * /proc/sys/kernel/ftrace_dump_on_oops to true.
78  */
79 int ftrace_dump_on_oops;
80
81 static int __init set_ftrace_dump_on_oops(char *str)
82 {
83         ftrace_dump_on_oops = 1;
84         return 1;
85 }
86 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
87
88 long
89 ns2usecs(cycle_t nsec)
90 {
91         nsec += 500;
92         do_div(nsec, 1000);
93         return nsec;
94 }
95
96 cycle_t ftrace_now(int cpu)
97 {
98         u64 ts = ring_buffer_time_stamp(cpu);
99         ring_buffer_normalize_time_stamp(cpu, &ts);
100         return ts;
101 }
102
103 /*
104  * The global_trace is the descriptor that holds the tracing
105  * buffers for the live tracing. For each CPU, it contains
106  * a link list of pages that will store trace entries. The
107  * page descriptor of the pages in the memory is used to hold
108  * the link list by linking the lru item in the page descriptor
109  * to each of the pages in the buffer per CPU.
110  *
111  * For each active CPU there is a data field that holds the
112  * pages for the buffer for that CPU. Each CPU has the same number
113  * of pages allocated for its buffer.
114  */
115 static struct trace_array       global_trace;
116
117 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
118
119 /*
120  * The max_tr is used to snapshot the global_trace when a maximum
121  * latency is reached. Some tracers will use this to store a maximum
122  * trace while it continues examining live traces.
123  *
124  * The buffers for the max_tr are set up the same as the global_trace.
125  * When a snapshot is taken, the link list of the max_tr is swapped
126  * with the link list of the global_trace and the buffers are reset for
127  * the global_trace so the tracing can continue.
128  */
129 static struct trace_array       max_tr;
130
131 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
132
133 /* tracer_enabled is used to toggle activation of a tracer */
134 static int                      tracer_enabled = 1;
135
136 /* function tracing enabled */
137 int                             ftrace_function_enabled;
138
139 /*
140  * trace_buf_size is the size in bytes that is allocated
141  * for a buffer. Note, the number of bytes is always rounded
142  * to page size.
143  *
144  * This number is purposely set to a low number of 16384.
145  * If the dump on oops happens, it will be much appreciated
146  * to not have to wait for all that output. Anyway this can be
147  * boot time and run time configurable.
148  */
149 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
150
151 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
152
153 /* trace_types holds a link list of available tracers. */
154 static struct tracer            *trace_types __read_mostly;
155
156 /* current_trace points to the tracer that is currently active */
157 static struct tracer            *current_trace __read_mostly;
158
159 /*
160  * max_tracer_type_len is used to simplify the allocating of
161  * buffers to read userspace tracer names. We keep track of
162  * the longest tracer name registered.
163  */
164 static int                      max_tracer_type_len;
165
166 /*
167  * trace_types_lock is used to protect the trace_types list.
168  * This lock is also used to keep user access serialized.
169  * Accesses from userspace will grab this lock while userspace
170  * activities happen inside the kernel.
171  */
172 static DEFINE_MUTEX(trace_types_lock);
173
174 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
175 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
176
177 /* trace_flags holds iter_ctrl options */
178 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
179
180 /**
181  * trace_wake_up - wake up tasks waiting for trace input
182  *
183  * Simply wakes up any task that is blocked on the trace_wait
184  * queue. These is used with trace_poll for tasks polling the trace.
185  */
186 void trace_wake_up(void)
187 {
188         /*
189          * The runqueue_is_locked() can fail, but this is the best we
190          * have for now:
191          */
192         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
193                 wake_up(&trace_wait);
194 }
195
196 static int __init set_buf_size(char *str)
197 {
198         unsigned long buf_size;
199         int ret;
200
201         if (!str)
202                 return 0;
203         ret = strict_strtoul(str, 0, &buf_size);
204         /* nr_entries can not be zero */
205         if (ret < 0 || buf_size == 0)
206                 return 0;
207         trace_buf_size = buf_size;
208         return 1;
209 }
210 __setup("trace_buf_size=", set_buf_size);
211
212 unsigned long nsecs_to_usecs(unsigned long nsecs)
213 {
214         return nsecs / 1000;
215 }
216
217 /*
218  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
219  * control the output of kernel symbols.
220  */
221 #define TRACE_ITER_SYM_MASK \
222         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
223
224 /* These must match the bit postions in trace_iterator_flags */
225 static const char *trace_options[] = {
226         "print-parent",
227         "sym-offset",
228         "sym-addr",
229         "verbose",
230         "raw",
231         "hex",
232         "bin",
233         "block",
234         "stacktrace",
235         "sched-tree",
236         "ftrace_printk",
237         NULL
238 };
239
240 /*
241  * ftrace_max_lock is used to protect the swapping of buffers
242  * when taking a max snapshot. The buffers themselves are
243  * protected by per_cpu spinlocks. But the action of the swap
244  * needs its own lock.
245  *
246  * This is defined as a raw_spinlock_t in order to help
247  * with performance when lockdep debugging is enabled.
248  */
249 static raw_spinlock_t ftrace_max_lock =
250         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
251
252 /*
253  * Copy the new maximum trace into the separate maximum-trace
254  * structure. (this way the maximum trace is permanently saved,
255  * for later retrieval via /debugfs/tracing/latency_trace)
256  */
257 static void
258 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
259 {
260         struct trace_array_cpu *data = tr->data[cpu];
261
262         max_tr.cpu = cpu;
263         max_tr.time_start = data->preempt_timestamp;
264
265         data = max_tr.data[cpu];
266         data->saved_latency = tracing_max_latency;
267
268         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
269         data->pid = tsk->pid;
270         data->uid = tsk->uid;
271         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
272         data->policy = tsk->policy;
273         data->rt_priority = tsk->rt_priority;
274
275         /* record this tasks comm */
276         tracing_record_cmdline(current);
277 }
278
279 /**
280  * trace_seq_printf - sequence printing of trace information
281  * @s: trace sequence descriptor
282  * @fmt: printf format string
283  *
284  * The tracer may use either sequence operations or its own
285  * copy to user routines. To simplify formating of a trace
286  * trace_seq_printf is used to store strings into a special
287  * buffer (@s). Then the output may be either used by
288  * the sequencer or pulled into another buffer.
289  */
290 int
291 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
292 {
293         int len = (PAGE_SIZE - 1) - s->len;
294         va_list ap;
295         int ret;
296
297         if (!len)
298                 return 0;
299
300         va_start(ap, fmt);
301         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
302         va_end(ap);
303
304         /* If we can't write it all, don't bother writing anything */
305         if (ret >= len)
306                 return 0;
307
308         s->len += ret;
309
310         return len;
311 }
312
313 /**
314  * trace_seq_puts - trace sequence printing of simple string
315  * @s: trace sequence descriptor
316  * @str: simple string to record
317  *
318  * The tracer may use either the sequence operations or its own
319  * copy to user routines. This function records a simple string
320  * into a special buffer (@s) for later retrieval by a sequencer
321  * or other mechanism.
322  */
323 static int
324 trace_seq_puts(struct trace_seq *s, const char *str)
325 {
326         int len = strlen(str);
327
328         if (len > ((PAGE_SIZE - 1) - s->len))
329                 return 0;
330
331         memcpy(s->buffer + s->len, str, len);
332         s->len += len;
333
334         return len;
335 }
336
337 static int
338 trace_seq_putc(struct trace_seq *s, unsigned char c)
339 {
340         if (s->len >= (PAGE_SIZE - 1))
341                 return 0;
342
343         s->buffer[s->len++] = c;
344
345         return 1;
346 }
347
348 static int
349 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
350 {
351         if (len > ((PAGE_SIZE - 1) - s->len))
352                 return 0;
353
354         memcpy(s->buffer + s->len, mem, len);
355         s->len += len;
356
357         return len;
358 }
359
360 #define MAX_MEMHEX_BYTES        8
361 #define HEX_CHARS               (MAX_MEMHEX_BYTES*2 + 1)
362
363 static int
364 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
365 {
366         unsigned char hex[HEX_CHARS];
367         unsigned char *data = mem;
368         int i, j;
369
370 #ifdef __BIG_ENDIAN
371         for (i = 0, j = 0; i < len; i++) {
372 #else
373         for (i = len-1, j = 0; i >= 0; i--) {
374 #endif
375                 hex[j++] = hex_asc_hi(data[i]);
376                 hex[j++] = hex_asc_lo(data[i]);
377         }
378         hex[j++] = ' ';
379
380         return trace_seq_putmem(s, hex, j);
381 }
382
383 static void
384 trace_seq_reset(struct trace_seq *s)
385 {
386         s->len = 0;
387         s->readpos = 0;
388 }
389
390 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
391 {
392         int len;
393         int ret;
394
395         if (s->len <= s->readpos)
396                 return -EBUSY;
397
398         len = s->len - s->readpos;
399         if (cnt > len)
400                 cnt = len;
401         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
402         if (ret)
403                 return -EFAULT;
404
405         s->readpos += len;
406         return cnt;
407 }
408
409 static void
410 trace_print_seq(struct seq_file *m, struct trace_seq *s)
411 {
412         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
413
414         s->buffer[len] = 0;
415         seq_puts(m, s->buffer);
416
417         trace_seq_reset(s);
418 }
419
420 /**
421  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
422  * @tr: tracer
423  * @tsk: the task with the latency
424  * @cpu: The cpu that initiated the trace.
425  *
426  * Flip the buffers between the @tr and the max_tr and record information
427  * about which task was the cause of this latency.
428  */
429 void
430 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
431 {
432         struct ring_buffer *buf = tr->buffer;
433
434         WARN_ON_ONCE(!irqs_disabled());
435         __raw_spin_lock(&ftrace_max_lock);
436
437         tr->buffer = max_tr.buffer;
438         max_tr.buffer = buf;
439
440         ftrace_disable_cpu();
441         ring_buffer_reset(tr->buffer);
442         ftrace_enable_cpu();
443
444         __update_max_tr(tr, tsk, cpu);
445         __raw_spin_unlock(&ftrace_max_lock);
446 }
447
448 /**
449  * update_max_tr_single - only copy one trace over, and reset the rest
450  * @tr - tracer
451  * @tsk - task with the latency
452  * @cpu - the cpu of the buffer to copy.
453  *
454  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
455  */
456 void
457 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
458 {
459         int ret;
460
461         WARN_ON_ONCE(!irqs_disabled());
462         __raw_spin_lock(&ftrace_max_lock);
463
464         ftrace_disable_cpu();
465
466         ring_buffer_reset(max_tr.buffer);
467         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
468
469         ftrace_enable_cpu();
470
471         WARN_ON_ONCE(ret);
472
473         __update_max_tr(tr, tsk, cpu);
474         __raw_spin_unlock(&ftrace_max_lock);
475 }
476
477 /**
478  * register_tracer - register a tracer with the ftrace system.
479  * @type - the plugin for the tracer
480  *
481  * Register a new plugin tracer.
482  */
483 int register_tracer(struct tracer *type)
484 {
485         struct tracer *t;
486         int len;
487         int ret = 0;
488
489         if (!type->name) {
490                 pr_info("Tracer must have a name\n");
491                 return -1;
492         }
493
494         mutex_lock(&trace_types_lock);
495         for (t = trace_types; t; t = t->next) {
496                 if (strcmp(type->name, t->name) == 0) {
497                         /* already found */
498                         pr_info("Trace %s already registered\n",
499                                 type->name);
500                         ret = -1;
501                         goto out;
502                 }
503         }
504
505 #ifdef CONFIG_FTRACE_STARTUP_TEST
506         if (type->selftest) {
507                 struct tracer *saved_tracer = current_trace;
508                 struct trace_array *tr = &global_trace;
509                 int saved_ctrl = tr->ctrl;
510                 int i;
511                 /*
512                  * Run a selftest on this tracer.
513                  * Here we reset the trace buffer, and set the current
514                  * tracer to be this tracer. The tracer can then run some
515                  * internal tracing to verify that everything is in order.
516                  * If we fail, we do not register this tracer.
517                  */
518                 for_each_tracing_cpu(i) {
519                         tracing_reset(tr, i);
520                 }
521                 current_trace = type;
522                 tr->ctrl = 0;
523                 /* the test is responsible for initializing and enabling */
524                 pr_info("Testing tracer %s: ", type->name);
525                 ret = type->selftest(type, tr);
526                 /* the test is responsible for resetting too */
527                 current_trace = saved_tracer;
528                 tr->ctrl = saved_ctrl;
529                 if (ret) {
530                         printk(KERN_CONT "FAILED!\n");
531                         goto out;
532                 }
533                 /* Only reset on passing, to avoid touching corrupted buffers */
534                 for_each_tracing_cpu(i) {
535                         tracing_reset(tr, i);
536                 }
537                 printk(KERN_CONT "PASSED\n");
538         }
539 #endif
540
541         type->next = trace_types;
542         trace_types = type;
543         len = strlen(type->name);
544         if (len > max_tracer_type_len)
545                 max_tracer_type_len = len;
546
547  out:
548         mutex_unlock(&trace_types_lock);
549
550         return ret;
551 }
552
553 void unregister_tracer(struct tracer *type)
554 {
555         struct tracer **t;
556         int len;
557
558         mutex_lock(&trace_types_lock);
559         for (t = &trace_types; *t; t = &(*t)->next) {
560                 if (*t == type)
561                         goto found;
562         }
563         pr_info("Trace %s not registered\n", type->name);
564         goto out;
565
566  found:
567         *t = (*t)->next;
568         if (strlen(type->name) != max_tracer_type_len)
569                 goto out;
570
571         max_tracer_type_len = 0;
572         for (t = &trace_types; *t; t = &(*t)->next) {
573                 len = strlen((*t)->name);
574                 if (len > max_tracer_type_len)
575                         max_tracer_type_len = len;
576         }
577  out:
578         mutex_unlock(&trace_types_lock);
579 }
580
581 void tracing_reset(struct trace_array *tr, int cpu)
582 {
583         ftrace_disable_cpu();
584         ring_buffer_reset_cpu(tr->buffer, cpu);
585         ftrace_enable_cpu();
586 }
587
588 #define SAVED_CMDLINES 128
589 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
590 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
591 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
592 static int cmdline_idx;
593 static DEFINE_SPINLOCK(trace_cmdline_lock);
594
595 /* temporary disable recording */
596 atomic_t trace_record_cmdline_disabled __read_mostly;
597
598 static void trace_init_cmdlines(void)
599 {
600         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
601         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
602         cmdline_idx = 0;
603 }
604
605 void trace_stop_cmdline_recording(void);
606
607 static void trace_save_cmdline(struct task_struct *tsk)
608 {
609         unsigned map;
610         unsigned idx;
611
612         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
613                 return;
614
615         /*
616          * It's not the end of the world if we don't get
617          * the lock, but we also don't want to spin
618          * nor do we want to disable interrupts,
619          * so if we miss here, then better luck next time.
620          */
621         if (!spin_trylock(&trace_cmdline_lock))
622                 return;
623
624         idx = map_pid_to_cmdline[tsk->pid];
625         if (idx >= SAVED_CMDLINES) {
626                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
627
628                 map = map_cmdline_to_pid[idx];
629                 if (map <= PID_MAX_DEFAULT)
630                         map_pid_to_cmdline[map] = (unsigned)-1;
631
632                 map_pid_to_cmdline[tsk->pid] = idx;
633
634                 cmdline_idx = idx;
635         }
636
637         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
638
639         spin_unlock(&trace_cmdline_lock);
640 }
641
642 static char *trace_find_cmdline(int pid)
643 {
644         char *cmdline = "<...>";
645         unsigned map;
646
647         if (!pid)
648                 return "<idle>";
649
650         if (pid > PID_MAX_DEFAULT)
651                 goto out;
652
653         map = map_pid_to_cmdline[pid];
654         if (map >= SAVED_CMDLINES)
655                 goto out;
656
657         cmdline = saved_cmdlines[map];
658
659  out:
660         return cmdline;
661 }
662
663 void tracing_record_cmdline(struct task_struct *tsk)
664 {
665         if (atomic_read(&trace_record_cmdline_disabled))
666                 return;
667
668         trace_save_cmdline(tsk);
669 }
670
671 void
672 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
673                              int pc)
674 {
675         struct task_struct *tsk = current;
676
677         entry->preempt_count            = pc & 0xff;
678         entry->pid                      = (tsk) ? tsk->pid : 0;
679         entry->flags =
680                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
681                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
682                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
683                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
684 }
685
686 void
687 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
688                unsigned long ip, unsigned long parent_ip, unsigned long flags,
689                int pc)
690 {
691         struct ring_buffer_event *event;
692         struct ftrace_entry *entry;
693         unsigned long irq_flags;
694
695         /* If we are reading the ring buffer, don't trace */
696         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
697                 return;
698
699         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
700                                          &irq_flags);
701         if (!event)
702                 return;
703         entry   = ring_buffer_event_data(event);
704         tracing_generic_entry_update(&entry->ent, flags, pc);
705         entry->ent.type                 = TRACE_FN;
706         entry->ip                       = ip;
707         entry->parent_ip                = parent_ip;
708         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
709 }
710
711 void
712 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
713        unsigned long ip, unsigned long parent_ip, unsigned long flags,
714        int pc)
715 {
716         if (likely(!atomic_read(&data->disabled)))
717                 trace_function(tr, data, ip, parent_ip, flags, pc);
718 }
719
720 static void ftrace_trace_stack(struct trace_array *tr,
721                                struct trace_array_cpu *data,
722                                unsigned long flags,
723                                int skip, int pc)
724 {
725         struct ring_buffer_event *event;
726         struct stack_entry *entry;
727         struct stack_trace trace;
728         unsigned long irq_flags;
729
730         if (!(trace_flags & TRACE_ITER_STACKTRACE))
731                 return;
732
733         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
734                                          &irq_flags);
735         if (!event)
736                 return;
737         entry   = ring_buffer_event_data(event);
738         tracing_generic_entry_update(&entry->ent, flags, pc);
739         entry->ent.type         = TRACE_STACK;
740
741         memset(&entry->caller, 0, sizeof(entry->caller));
742
743         trace.nr_entries        = 0;
744         trace.max_entries       = FTRACE_STACK_ENTRIES;
745         trace.skip              = skip;
746         trace.entries           = entry->caller;
747
748         save_stack_trace(&trace);
749         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
750 }
751
752 void __trace_stack(struct trace_array *tr,
753                    struct trace_array_cpu *data,
754                    unsigned long flags,
755                    int skip)
756 {
757         ftrace_trace_stack(tr, data, flags, skip, preempt_count());
758 }
759
760 static void
761 ftrace_trace_special(void *__tr, void *__data,
762                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
763                      int pc)
764 {
765         struct ring_buffer_event *event;
766         struct trace_array_cpu *data = __data;
767         struct trace_array *tr = __tr;
768         struct special_entry *entry;
769         unsigned long irq_flags;
770
771         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
772                                          &irq_flags);
773         if (!event)
774                 return;
775         entry   = ring_buffer_event_data(event);
776         tracing_generic_entry_update(&entry->ent, 0, pc);
777         entry->ent.type                 = TRACE_SPECIAL;
778         entry->arg1                     = arg1;
779         entry->arg2                     = arg2;
780         entry->arg3                     = arg3;
781         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
782         ftrace_trace_stack(tr, data, irq_flags, 4, pc);
783
784         trace_wake_up();
785 }
786
787 void
788 __trace_special(void *__tr, void *__data,
789                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
790 {
791         ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
792 }
793
794 void
795 tracing_sched_switch_trace(struct trace_array *tr,
796                            struct trace_array_cpu *data,
797                            struct task_struct *prev,
798                            struct task_struct *next,
799                            unsigned long flags, int pc)
800 {
801         struct ring_buffer_event *event;
802         struct ctx_switch_entry *entry;
803         unsigned long irq_flags;
804
805         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
806                                            &irq_flags);
807         if (!event)
808                 return;
809         entry   = ring_buffer_event_data(event);
810         tracing_generic_entry_update(&entry->ent, flags, pc);
811         entry->ent.type                 = TRACE_CTX;
812         entry->prev_pid                 = prev->pid;
813         entry->prev_prio                = prev->prio;
814         entry->prev_state               = prev->state;
815         entry->next_pid                 = next->pid;
816         entry->next_prio                = next->prio;
817         entry->next_state               = next->state;
818         entry->next_cpu = task_cpu(next);
819         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
820         ftrace_trace_stack(tr, data, flags, 5, pc);
821 }
822
823 void
824 tracing_sched_wakeup_trace(struct trace_array *tr,
825                            struct trace_array_cpu *data,
826                            struct task_struct *wakee,
827                            struct task_struct *curr,
828                            unsigned long flags, int pc)
829 {
830         struct ring_buffer_event *event;
831         struct ctx_switch_entry *entry;
832         unsigned long irq_flags;
833
834         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
835                                            &irq_flags);
836         if (!event)
837                 return;
838         entry   = ring_buffer_event_data(event);
839         tracing_generic_entry_update(&entry->ent, flags, pc);
840         entry->ent.type                 = TRACE_WAKE;
841         entry->prev_pid                 = curr->pid;
842         entry->prev_prio                = curr->prio;
843         entry->prev_state               = curr->state;
844         entry->next_pid                 = wakee->pid;
845         entry->next_prio                = wakee->prio;
846         entry->next_state               = wakee->state;
847         entry->next_cpu                 = task_cpu(wakee);
848         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
849         ftrace_trace_stack(tr, data, flags, 6, pc);
850
851         trace_wake_up();
852 }
853
854 void
855 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
856 {
857         struct trace_array *tr = &global_trace;
858         struct trace_array_cpu *data;
859         int cpu;
860         int pc;
861
862         if (tracing_disabled || !tr->ctrl)
863                 return;
864
865         pc = preempt_count();
866         preempt_disable_notrace();
867         cpu = raw_smp_processor_id();
868         data = tr->data[cpu];
869
870         if (likely(!atomic_read(&data->disabled)))
871                 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
872
873         preempt_enable_notrace();
874 }
875
876 #ifdef CONFIG_FTRACE
877 static void
878 function_trace_call(unsigned long ip, unsigned long parent_ip)
879 {
880         struct trace_array *tr = &global_trace;
881         struct trace_array_cpu *data;
882         unsigned long flags;
883         long disabled;
884         int cpu, resched;
885         int pc;
886
887         if (unlikely(!ftrace_function_enabled))
888                 return;
889
890         if (skip_trace(ip))
891                 return;
892
893         pc = preempt_count();
894         resched = need_resched();
895         preempt_disable_notrace();
896         local_save_flags(flags);
897         cpu = raw_smp_processor_id();
898         data = tr->data[cpu];
899         disabled = atomic_inc_return(&data->disabled);
900
901         if (likely(disabled == 1))
902                 trace_function(tr, data, ip, parent_ip, flags, pc);
903
904         atomic_dec(&data->disabled);
905         if (resched)
906                 preempt_enable_no_resched_notrace();
907         else
908                 preempt_enable_notrace();
909 }
910
911 static struct ftrace_ops trace_ops __read_mostly =
912 {
913         .func = function_trace_call,
914 };
915
916 void tracing_start_function_trace(void)
917 {
918         ftrace_function_enabled = 0;
919         register_ftrace_function(&trace_ops);
920         if (tracer_enabled)
921                 ftrace_function_enabled = 1;
922 }
923
924 void tracing_stop_function_trace(void)
925 {
926         ftrace_function_enabled = 0;
927         unregister_ftrace_function(&trace_ops);
928 }
929 #endif
930
931 enum trace_file_type {
932         TRACE_FILE_LAT_FMT      = 1,
933 };
934
935 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
936 {
937         /* Don't allow ftrace to trace into the ring buffers */
938         ftrace_disable_cpu();
939
940         iter->idx++;
941         if (iter->buffer_iter[iter->cpu])
942                 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
943
944         ftrace_enable_cpu();
945 }
946
947 static struct trace_entry *
948 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
949 {
950         struct ring_buffer_event *event;
951         struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
952
953         /* Don't allow ftrace to trace into the ring buffers */
954         ftrace_disable_cpu();
955
956         if (buf_iter)
957                 event = ring_buffer_iter_peek(buf_iter, ts);
958         else
959                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
960
961         ftrace_enable_cpu();
962
963         return event ? ring_buffer_event_data(event) : NULL;
964 }
965
966 static struct trace_entry *
967 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
968 {
969         struct ring_buffer *buffer = iter->tr->buffer;
970         struct trace_entry *ent, *next = NULL;
971         u64 next_ts = 0, ts;
972         int next_cpu = -1;
973         int cpu;
974
975         for_each_tracing_cpu(cpu) {
976
977                 if (ring_buffer_empty_cpu(buffer, cpu))
978                         continue;
979
980                 ent = peek_next_entry(iter, cpu, &ts);
981
982                 /*
983                  * Pick the entry with the smallest timestamp:
984                  */
985                 if (ent && (!next || ts < next_ts)) {
986                         next = ent;
987                         next_cpu = cpu;
988                         next_ts = ts;
989                 }
990         }
991
992         if (ent_cpu)
993                 *ent_cpu = next_cpu;
994
995         if (ent_ts)
996                 *ent_ts = next_ts;
997
998         return next;
999 }
1000
1001 /* Find the next real entry, without updating the iterator itself */
1002 static struct trace_entry *
1003 find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1004 {
1005         return __find_next_entry(iter, ent_cpu, ent_ts);
1006 }
1007
1008 /* Find the next real entry, and increment the iterator to the next entry */
1009 static void *find_next_entry_inc(struct trace_iterator *iter)
1010 {
1011         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1012
1013         if (iter->ent)
1014                 trace_iterator_increment(iter, iter->cpu);
1015
1016         return iter->ent ? iter : NULL;
1017 }
1018
1019 static void trace_consume(struct trace_iterator *iter)
1020 {
1021         /* Don't allow ftrace to trace into the ring buffers */
1022         ftrace_disable_cpu();
1023         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1024         ftrace_enable_cpu();
1025 }
1026
1027 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1028 {
1029         struct trace_iterator *iter = m->private;
1030         int i = (int)*pos;
1031         void *ent;
1032
1033         (*pos)++;
1034
1035         /* can't go backwards */
1036         if (iter->idx > i)
1037                 return NULL;
1038
1039         if (iter->idx < 0)
1040                 ent = find_next_entry_inc(iter);
1041         else
1042                 ent = iter;
1043
1044         while (ent && iter->idx < i)
1045                 ent = find_next_entry_inc(iter);
1046
1047         iter->pos = *pos;
1048
1049         return ent;
1050 }
1051
1052 static void *s_start(struct seq_file *m, loff_t *pos)
1053 {
1054         struct trace_iterator *iter = m->private;
1055         void *p = NULL;
1056         loff_t l = 0;
1057         int cpu;
1058
1059         mutex_lock(&trace_types_lock);
1060
1061         if (!current_trace || current_trace != iter->trace) {
1062                 mutex_unlock(&trace_types_lock);
1063                 return NULL;
1064         }
1065
1066         atomic_inc(&trace_record_cmdline_disabled);
1067
1068         /* let the tracer grab locks here if needed */
1069         if (current_trace->start)
1070                 current_trace->start(iter);
1071
1072         if (*pos != iter->pos) {
1073                 iter->ent = NULL;
1074                 iter->cpu = 0;
1075                 iter->idx = -1;
1076
1077                 ftrace_disable_cpu();
1078
1079                 for_each_tracing_cpu(cpu) {
1080                         ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1081                 }
1082
1083                 ftrace_enable_cpu();
1084
1085                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1086                         ;
1087
1088         } else {
1089                 l = *pos - 1;
1090                 p = s_next(m, p, &l);
1091         }
1092
1093         return p;
1094 }
1095
1096 static void s_stop(struct seq_file *m, void *p)
1097 {
1098         struct trace_iterator *iter = m->private;
1099
1100         atomic_dec(&trace_record_cmdline_disabled);
1101
1102         /* let the tracer release locks here if needed */
1103         if (current_trace && current_trace == iter->trace && iter->trace->stop)
1104                 iter->trace->stop(iter);
1105
1106         mutex_unlock(&trace_types_lock);
1107 }
1108
1109 #define KRETPROBE_MSG "[unknown/kretprobe'd]"
1110
1111 #ifdef CONFIG_KRETPROBES
1112 static inline int kretprobed(unsigned long addr)
1113 {
1114         return addr == (unsigned long)kretprobe_trampoline;
1115 }
1116 #else
1117 static inline int kretprobed(unsigned long addr)
1118 {
1119         return 0;
1120 }
1121 #endif /* CONFIG_KRETPROBES */
1122
1123 static int
1124 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1125 {
1126 #ifdef CONFIG_KALLSYMS
1127         char str[KSYM_SYMBOL_LEN];
1128
1129         kallsyms_lookup(address, NULL, NULL, NULL, str);
1130
1131         return trace_seq_printf(s, fmt, str);
1132 #endif
1133         return 1;
1134 }
1135
1136 static int
1137 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1138                      unsigned long address)
1139 {
1140 #ifdef CONFIG_KALLSYMS
1141         char str[KSYM_SYMBOL_LEN];
1142
1143         sprint_symbol(str, address);
1144         return trace_seq_printf(s, fmt, str);
1145 #endif
1146         return 1;
1147 }
1148
1149 #ifndef CONFIG_64BIT
1150 # define IP_FMT "%08lx"
1151 #else
1152 # define IP_FMT "%016lx"
1153 #endif
1154
1155 static int
1156 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1157 {
1158         int ret;
1159
1160         if (!ip)
1161                 return trace_seq_printf(s, "0");
1162
1163         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1164                 ret = seq_print_sym_offset(s, "%s", ip);
1165         else
1166                 ret = seq_print_sym_short(s, "%s", ip);
1167
1168         if (!ret)
1169                 return 0;
1170
1171         if (sym_flags & TRACE_ITER_SYM_ADDR)
1172                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1173         return ret;
1174 }
1175
1176 static void print_lat_help_header(struct seq_file *m)
1177 {
1178         seq_puts(m, "#                  _------=> CPU#            \n");
1179         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1180         seq_puts(m, "#                | / _----=> need-resched    \n");
1181         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1182         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1183         seq_puts(m, "#                |||| /                      \n");
1184         seq_puts(m, "#                |||||     delay             \n");
1185         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1186         seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
1187 }
1188
1189 static void print_func_help_header(struct seq_file *m)
1190 {
1191         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1192         seq_puts(m, "#              | |       |          |         |\n");
1193 }
1194
1195
1196 static void
1197 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1198 {
1199         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1200         struct trace_array *tr = iter->tr;
1201         struct trace_array_cpu *data = tr->data[tr->cpu];
1202         struct tracer *type = current_trace;
1203         unsigned long total;
1204         unsigned long entries;
1205         const char *name = "preemption";
1206
1207         if (type)
1208                 name = type->name;
1209
1210         entries = ring_buffer_entries(iter->tr->buffer);
1211         total = entries +
1212                 ring_buffer_overruns(iter->tr->buffer);
1213
1214         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1215                    name, UTS_RELEASE);
1216         seq_puts(m, "-----------------------------------"
1217                  "---------------------------------\n");
1218         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1219                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1220                    nsecs_to_usecs(data->saved_latency),
1221                    entries,
1222                    total,
1223                    tr->cpu,
1224 #if defined(CONFIG_PREEMPT_NONE)
1225                    "server",
1226 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1227                    "desktop",
1228 #elif defined(CONFIG_PREEMPT)
1229                    "preempt",
1230 #else
1231                    "unknown",
1232 #endif
1233                    /* These are reserved for later use */
1234                    0, 0, 0, 0);
1235 #ifdef CONFIG_SMP
1236         seq_printf(m, " #P:%d)\n", num_online_cpus());
1237 #else
1238         seq_puts(m, ")\n");
1239 #endif
1240         seq_puts(m, "    -----------------\n");
1241         seq_printf(m, "    | task: %.16s-%d "
1242                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1243                    data->comm, data->pid, data->uid, data->nice,
1244                    data->policy, data->rt_priority);
1245         seq_puts(m, "    -----------------\n");
1246
1247         if (data->critical_start) {
1248                 seq_puts(m, " => started at: ");
1249                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1250                 trace_print_seq(m, &iter->seq);
1251                 seq_puts(m, "\n => ended at:   ");
1252                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1253                 trace_print_seq(m, &iter->seq);
1254                 seq_puts(m, "\n");
1255         }
1256
1257         seq_puts(m, "\n");
1258 }
1259
1260 static void
1261 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1262 {
1263         int hardirq, softirq;
1264         char *comm;
1265
1266         comm = trace_find_cmdline(entry->pid);
1267
1268         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1269         trace_seq_printf(s, "%3d", cpu);
1270         trace_seq_printf(s, "%c%c",
1271                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1272                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1273
1274         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1275         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1276         if (hardirq && softirq) {
1277                 trace_seq_putc(s, 'H');
1278         } else {
1279                 if (hardirq) {
1280                         trace_seq_putc(s, 'h');
1281                 } else {
1282                         if (softirq)
1283                                 trace_seq_putc(s, 's');
1284                         else
1285                                 trace_seq_putc(s, '.');
1286                 }
1287         }
1288
1289         if (entry->preempt_count)
1290                 trace_seq_printf(s, "%x", entry->preempt_count);
1291         else
1292                 trace_seq_puts(s, ".");
1293 }
1294
1295 unsigned long preempt_mark_thresh = 100;
1296
1297 static void
1298 lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1299                     unsigned long rel_usecs)
1300 {
1301         trace_seq_printf(s, " %4lldus", abs_usecs);
1302         if (rel_usecs > preempt_mark_thresh)
1303                 trace_seq_puts(s, "!: ");
1304         else if (rel_usecs > 1)
1305                 trace_seq_puts(s, "+: ");
1306         else
1307                 trace_seq_puts(s, " : ");
1308 }
1309
1310 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1311
1312 /*
1313  * The message is supposed to contain an ending newline.
1314  * If the printing stops prematurely, try to add a newline of our own.
1315  */
1316 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1317 {
1318         struct trace_entry *ent;
1319         struct trace_field_cont *cont;
1320         bool ok = true;
1321
1322         ent = peek_next_entry(iter, iter->cpu, NULL);
1323         if (!ent || ent->type != TRACE_CONT) {
1324                 trace_seq_putc(s, '\n');
1325                 return;
1326         }
1327
1328         do {
1329                 cont = (struct trace_field_cont *)ent;
1330                 if (ok)
1331                         ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1332
1333                 ftrace_disable_cpu();
1334
1335                 if (iter->buffer_iter[iter->cpu])
1336                         ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1337                 else
1338                         ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1339
1340                 ftrace_enable_cpu();
1341
1342                 ent = peek_next_entry(iter, iter->cpu, NULL);
1343         } while (ent && ent->type == TRACE_CONT);
1344
1345         if (!ok)
1346                 trace_seq_putc(s, '\n');
1347 }
1348
1349 static enum print_line_t
1350 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1351 {
1352         struct trace_seq *s = &iter->seq;
1353         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1354         struct trace_entry *next_entry;
1355         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1356         struct trace_entry *entry = iter->ent;
1357         unsigned long abs_usecs;
1358         unsigned long rel_usecs;
1359         u64 next_ts;
1360         char *comm;
1361         int S, T;
1362         int i;
1363         unsigned state;
1364
1365         if (entry->type == TRACE_CONT)
1366                 return TRACE_TYPE_HANDLED;
1367
1368         next_entry = find_next_entry(iter, NULL, &next_ts);
1369         if (!next_entry)
1370                 next_ts = iter->ts;
1371         rel_usecs = ns2usecs(next_ts - iter->ts);
1372         abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1373
1374         if (verbose) {
1375                 comm = trace_find_cmdline(entry->pid);
1376                 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1377                                  " %ld.%03ldms (+%ld.%03ldms): ",
1378                                  comm,
1379                                  entry->pid, cpu, entry->flags,
1380                                  entry->preempt_count, trace_idx,
1381                                  ns2usecs(iter->ts),
1382                                  abs_usecs/1000,
1383                                  abs_usecs % 1000, rel_usecs/1000,
1384                                  rel_usecs % 1000);
1385         } else {
1386                 lat_print_generic(s, entry, cpu);
1387                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1388         }
1389         switch (entry->type) {
1390         case TRACE_FN: {
1391                 struct ftrace_entry *field;
1392
1393                 trace_assign_type(field, entry);
1394
1395                 seq_print_ip_sym(s, field->ip, sym_flags);
1396                 trace_seq_puts(s, " (");
1397                 if (kretprobed(field->parent_ip))
1398                         trace_seq_puts(s, KRETPROBE_MSG);
1399                 else
1400                         seq_print_ip_sym(s, field->parent_ip, sym_flags);
1401                 trace_seq_puts(s, ")\n");
1402                 break;
1403         }
1404         case TRACE_CTX:
1405         case TRACE_WAKE: {
1406                 struct ctx_switch_entry *field;
1407
1408                 trace_assign_type(field, entry);
1409
1410                 T = field->next_state < sizeof(state_to_char) ?
1411                         state_to_char[field->next_state] : 'X';
1412
1413                 state = field->prev_state ?
1414                         __ffs(field->prev_state) + 1 : 0;
1415                 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1416                 comm = trace_find_cmdline(field->next_pid);
1417                 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1418                                  field->prev_pid,
1419                                  field->prev_prio,
1420                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1421                                  field->next_cpu,
1422                                  field->next_pid,
1423                                  field->next_prio,
1424                                  T, comm);
1425                 break;
1426         }
1427         case TRACE_SPECIAL: {
1428                 struct special_entry *field;
1429
1430                 trace_assign_type(field, entry);
1431
1432                 trace_seq_printf(s, "# %ld %ld %ld\n",
1433                                  field->arg1,
1434                                  field->arg2,
1435                                  field->arg3);
1436                 break;
1437         }
1438         case TRACE_STACK: {
1439                 struct stack_entry *field;
1440
1441                 trace_assign_type(field, entry);
1442
1443                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1444                         if (i)
1445                                 trace_seq_puts(s, " <= ");
1446                         seq_print_ip_sym(s, field->caller[i], sym_flags);
1447                 }
1448                 trace_seq_puts(s, "\n");
1449                 break;
1450         }
1451         case TRACE_PRINT: {
1452                 struct print_entry *field;
1453
1454                 trace_assign_type(field, entry);
1455
1456                 seq_print_ip_sym(s, field->ip, sym_flags);
1457                 trace_seq_printf(s, ": %s", field->buf);
1458                 if (entry->flags & TRACE_FLAG_CONT)
1459                         trace_seq_print_cont(s, iter);
1460                 break;
1461         }
1462         default:
1463                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1464         }
1465         return TRACE_TYPE_HANDLED;
1466 }
1467
1468 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1469 {
1470         struct trace_seq *s = &iter->seq;
1471         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1472         struct trace_entry *entry;
1473         unsigned long usec_rem;
1474         unsigned long long t;
1475         unsigned long secs;
1476         char *comm;
1477         int ret;
1478         int S, T;
1479         int i;
1480
1481         entry = iter->ent;
1482
1483         if (entry->type == TRACE_CONT)
1484                 return TRACE_TYPE_HANDLED;
1485
1486         comm = trace_find_cmdline(iter->ent->pid);
1487
1488         t = ns2usecs(iter->ts);
1489         usec_rem = do_div(t, 1000000ULL);
1490         secs = (unsigned long)t;
1491
1492         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1493         if (!ret)
1494                 return TRACE_TYPE_PARTIAL_LINE;
1495         ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1496         if (!ret)
1497                 return TRACE_TYPE_PARTIAL_LINE;
1498         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1499         if (!ret)
1500                 return TRACE_TYPE_PARTIAL_LINE;
1501
1502         switch (entry->type) {
1503         case TRACE_FN: {
1504                 struct ftrace_entry *field;
1505
1506                 trace_assign_type(field, entry);
1507
1508                 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1509                 if (!ret)
1510                         return TRACE_TYPE_PARTIAL_LINE;
1511                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1512                                                 field->parent_ip) {
1513                         ret = trace_seq_printf(s, " <-");
1514                         if (!ret)
1515                                 return TRACE_TYPE_PARTIAL_LINE;
1516                         if (kretprobed(field->parent_ip))
1517                                 ret = trace_seq_puts(s, KRETPROBE_MSG);
1518                         else
1519                                 ret = seq_print_ip_sym(s,
1520                                                        field->parent_ip,
1521                                                        sym_flags);
1522                         if (!ret)
1523                                 return TRACE_TYPE_PARTIAL_LINE;
1524                 }
1525                 ret = trace_seq_printf(s, "\n");
1526                 if (!ret)
1527                         return TRACE_TYPE_PARTIAL_LINE;
1528                 break;
1529         }
1530         case TRACE_CTX:
1531         case TRACE_WAKE: {
1532                 struct ctx_switch_entry *field;
1533
1534                 trace_assign_type(field, entry);
1535
1536                 S = field->prev_state < sizeof(state_to_char) ?
1537                         state_to_char[field->prev_state] : 'X';
1538                 T = field->next_state < sizeof(state_to_char) ?
1539                         state_to_char[field->next_state] : 'X';
1540                 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1541                                        field->prev_pid,
1542                                        field->prev_prio,
1543                                        S,
1544                                        entry->type == TRACE_CTX ? "==>" : "  +",
1545                                        field->next_cpu,
1546                                        field->next_pid,
1547                                        field->next_prio,
1548                                        T);
1549                 if (!ret)
1550                         return TRACE_TYPE_PARTIAL_LINE;
1551                 break;
1552         }
1553         case TRACE_SPECIAL: {
1554                 struct special_entry *field;
1555
1556                 trace_assign_type(field, entry);
1557
1558                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1559                                  field->arg1,
1560                                  field->arg2,
1561                                  field->arg3);
1562                 if (!ret)
1563                         return TRACE_TYPE_PARTIAL_LINE;
1564                 break;
1565         }
1566         case TRACE_STACK: {
1567                 struct stack_entry *field;
1568
1569                 trace_assign_type(field, entry);
1570
1571                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1572                         if (i) {
1573                                 ret = trace_seq_puts(s, " <= ");
1574                                 if (!ret)
1575                                         return TRACE_TYPE_PARTIAL_LINE;
1576                         }
1577                         ret = seq_print_ip_sym(s, field->caller[i],
1578                                                sym_flags);
1579                         if (!ret)
1580                                 return TRACE_TYPE_PARTIAL_LINE;
1581                 }
1582                 ret = trace_seq_puts(s, "\n");
1583                 if (!ret)
1584                         return TRACE_TYPE_PARTIAL_LINE;
1585                 break;
1586         }
1587         case TRACE_PRINT: {
1588                 struct print_entry *field;
1589
1590                 trace_assign_type(field, entry);
1591
1592                 seq_print_ip_sym(s, field->ip, sym_flags);
1593                 trace_seq_printf(s, ": %s", field->buf);
1594                 if (entry->flags & TRACE_FLAG_CONT)
1595                         trace_seq_print_cont(s, iter);
1596                 break;
1597         }
1598         }
1599         return TRACE_TYPE_HANDLED;
1600 }
1601
1602 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1603 {
1604         struct trace_seq *s = &iter->seq;
1605         struct trace_entry *entry;
1606         int ret;
1607         int S, T;
1608
1609         entry = iter->ent;
1610
1611         if (entry->type == TRACE_CONT)
1612                 return TRACE_TYPE_HANDLED;
1613
1614         ret = trace_seq_printf(s, "%d %d %llu ",
1615                 entry->pid, iter->cpu, iter->ts);
1616         if (!ret)
1617                 return TRACE_TYPE_PARTIAL_LINE;
1618
1619         switch (entry->type) {
1620         case TRACE_FN: {
1621                 struct ftrace_entry *field;
1622
1623                 trace_assign_type(field, entry);
1624
1625                 ret = trace_seq_printf(s, "%x %x\n",
1626                                         field->ip,
1627                                         field->parent_ip);
1628                 if (!ret)
1629                         return TRACE_TYPE_PARTIAL_LINE;
1630                 break;
1631         }
1632         case TRACE_CTX:
1633         case TRACE_WAKE: {
1634                 struct ctx_switch_entry *field;
1635
1636                 trace_assign_type(field, entry);
1637
1638                 S = field->prev_state < sizeof(state_to_char) ?
1639                         state_to_char[field->prev_state] : 'X';
1640                 T = field->next_state < sizeof(state_to_char) ?
1641                         state_to_char[field->next_state] : 'X';
1642                 if (entry->type == TRACE_WAKE)
1643                         S = '+';
1644                 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1645                                        field->prev_pid,
1646                                        field->prev_prio,
1647                                        S,
1648                                        field->next_cpu,
1649                                        field->next_pid,
1650                                        field->next_prio,
1651                                        T);
1652                 if (!ret)
1653                         return TRACE_TYPE_PARTIAL_LINE;
1654                 break;
1655         }
1656         case TRACE_SPECIAL:
1657         case TRACE_STACK: {
1658                 struct special_entry *field;
1659
1660                 trace_assign_type(field, entry);
1661
1662                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1663                                  field->arg1,
1664                                  field->arg2,
1665                                  field->arg3);
1666                 if (!ret)
1667                         return TRACE_TYPE_PARTIAL_LINE;
1668                 break;
1669         }
1670         case TRACE_PRINT: {
1671                 struct print_entry *field;
1672
1673                 trace_assign_type(field, entry);
1674
1675                 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
1676                 if (entry->flags & TRACE_FLAG_CONT)
1677                         trace_seq_print_cont(s, iter);
1678                 break;
1679         }
1680         }
1681         return TRACE_TYPE_HANDLED;
1682 }
1683
1684 #define SEQ_PUT_FIELD_RET(s, x)                         \
1685 do {                                                    \
1686         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1687                 return 0;                               \
1688 } while (0)
1689
1690 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1691 do {                                                    \
1692         BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);     \
1693         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1694                 return 0;                               \
1695 } while (0)
1696
1697 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1698 {
1699         struct trace_seq *s = &iter->seq;
1700         unsigned char newline = '\n';
1701         struct trace_entry *entry;
1702         int S, T;
1703
1704         entry = iter->ent;
1705
1706         if (entry->type == TRACE_CONT)
1707                 return TRACE_TYPE_HANDLED;
1708
1709         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1710         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1711         SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1712
1713         switch (entry->type) {
1714         case TRACE_FN: {
1715                 struct ftrace_entry *field;
1716
1717                 trace_assign_type(field, entry);
1718
1719                 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
1720                 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1721                 break;
1722         }
1723         case TRACE_CTX:
1724         case TRACE_WAKE: {
1725                 struct ctx_switch_entry *field;
1726
1727                 trace_assign_type(field, entry);
1728
1729                 S = field->prev_state < sizeof(state_to_char) ?
1730                         state_to_char[field->prev_state] : 'X';
1731                 T = field->next_state < sizeof(state_to_char) ?
1732                         state_to_char[field->next_state] : 'X';
1733                 if (entry->type == TRACE_WAKE)
1734                         S = '+';
1735                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1736                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1737                 SEQ_PUT_HEX_FIELD_RET(s, S);
1738                 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1739                 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1740                 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1741                 SEQ_PUT_HEX_FIELD_RET(s, T);
1742                 break;
1743         }
1744         case TRACE_SPECIAL:
1745         case TRACE_STACK: {
1746                 struct special_entry *field;
1747
1748                 trace_assign_type(field, entry);
1749
1750                 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1751                 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1752                 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1753                 break;
1754         }
1755         }
1756         SEQ_PUT_FIELD_RET(s, newline);
1757
1758         return TRACE_TYPE_HANDLED;
1759 }
1760
1761 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1762 {
1763         struct trace_seq *s = &iter->seq;
1764         struct trace_entry *entry;
1765
1766         entry = iter->ent;
1767
1768         if (entry->type == TRACE_CONT)
1769                 return TRACE_TYPE_HANDLED;
1770
1771         SEQ_PUT_FIELD_RET(s, entry->pid);
1772         SEQ_PUT_FIELD_RET(s, iter->cpu);
1773         SEQ_PUT_FIELD_RET(s, iter->ts);
1774
1775         switch (entry->type) {
1776         case TRACE_FN: {
1777                 struct ftrace_entry *field;
1778
1779                 trace_assign_type(field, entry);
1780
1781                 SEQ_PUT_FIELD_RET(s, field->ip);
1782                 SEQ_PUT_FIELD_RET(s, field->parent_ip);
1783                 break;
1784         }
1785         case TRACE_CTX: {
1786                 struct ctx_switch_entry *field;
1787
1788                 trace_assign_type(field, entry);
1789
1790                 SEQ_PUT_FIELD_RET(s, field->prev_pid);
1791                 SEQ_PUT_FIELD_RET(s, field->prev_prio);
1792                 SEQ_PUT_FIELD_RET(s, field->prev_state);
1793                 SEQ_PUT_FIELD_RET(s, field->next_pid);
1794                 SEQ_PUT_FIELD_RET(s, field->next_prio);
1795                 SEQ_PUT_FIELD_RET(s, field->next_state);
1796                 break;
1797         }
1798         case TRACE_SPECIAL:
1799         case TRACE_STACK: {
1800                 struct special_entry *field;
1801
1802                 trace_assign_type(field, entry);
1803
1804                 SEQ_PUT_FIELD_RET(s, field->arg1);
1805                 SEQ_PUT_FIELD_RET(s, field->arg2);
1806                 SEQ_PUT_FIELD_RET(s, field->arg3);
1807                 break;
1808         }
1809         }
1810         return 1;
1811 }
1812
1813 static int trace_empty(struct trace_iterator *iter)
1814 {
1815         int cpu;
1816
1817         for_each_tracing_cpu(cpu) {
1818                 if (iter->buffer_iter[cpu]) {
1819                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1820                                 return 0;
1821                 } else {
1822                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1823                                 return 0;
1824                 }
1825         }
1826
1827         return 1;
1828 }
1829
1830 static enum print_line_t print_trace_line(struct trace_iterator *iter)
1831 {
1832         enum print_line_t ret;
1833
1834         if (iter->trace && iter->trace->print_line) {
1835                 ret = iter->trace->print_line(iter);
1836                 if (ret != TRACE_TYPE_UNHANDLED)
1837                         return ret;
1838         }
1839
1840         if (trace_flags & TRACE_ITER_BIN)
1841                 return print_bin_fmt(iter);
1842
1843         if (trace_flags & TRACE_ITER_HEX)
1844                 return print_hex_fmt(iter);
1845
1846         if (trace_flags & TRACE_ITER_RAW)
1847                 return print_raw_fmt(iter);
1848
1849         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1850                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1851
1852         return print_trace_fmt(iter);
1853 }
1854
1855 static int s_show(struct seq_file *m, void *v)
1856 {
1857         struct trace_iterator *iter = v;
1858
1859         if (iter->ent == NULL) {
1860                 if (iter->tr) {
1861                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1862                         seq_puts(m, "#\n");
1863                 }
1864                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1865                         /* print nothing if the buffers are empty */
1866                         if (trace_empty(iter))
1867                                 return 0;
1868                         print_trace_header(m, iter);
1869                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1870                                 print_lat_help_header(m);
1871                 } else {
1872                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1873                                 print_func_help_header(m);
1874                 }
1875         } else {
1876                 print_trace_line(iter);
1877                 trace_print_seq(m, &iter->seq);
1878         }
1879
1880         return 0;
1881 }
1882
1883 static struct seq_operations tracer_seq_ops = {
1884         .start          = s_start,
1885         .next           = s_next,
1886         .stop           = s_stop,
1887         .show           = s_show,
1888 };
1889
1890 static struct trace_iterator *
1891 __tracing_open(struct inode *inode, struct file *file, int *ret)
1892 {
1893         struct trace_iterator *iter;
1894         struct seq_file *m;
1895         int cpu;
1896
1897         if (tracing_disabled) {
1898                 *ret = -ENODEV;
1899                 return NULL;
1900         }
1901
1902         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1903         if (!iter) {
1904                 *ret = -ENOMEM;
1905                 goto out;
1906         }
1907
1908         mutex_lock(&trace_types_lock);
1909         if (current_trace && current_trace->print_max)
1910                 iter->tr = &max_tr;
1911         else
1912                 iter->tr = inode->i_private;
1913         iter->trace = current_trace;
1914         iter->pos = -1;
1915
1916         for_each_tracing_cpu(cpu) {
1917
1918                 iter->buffer_iter[cpu] =
1919                         ring_buffer_read_start(iter->tr->buffer, cpu);
1920
1921                 if (!iter->buffer_iter[cpu])
1922                         goto fail_buffer;
1923         }
1924
1925         /* TODO stop tracer */
1926         *ret = seq_open(file, &tracer_seq_ops);
1927         if (*ret)
1928                 goto fail_buffer;
1929
1930         m = file->private_data;
1931         m->private = iter;
1932
1933         /* stop the trace while dumping */
1934         if (iter->tr->ctrl) {
1935                 tracer_enabled = 0;
1936                 ftrace_function_enabled = 0;
1937         }
1938
1939         if (iter->trace && iter->trace->open)
1940                         iter->trace->open(iter);
1941
1942         mutex_unlock(&trace_types_lock);
1943
1944  out:
1945         return iter;
1946
1947  fail_buffer:
1948         for_each_tracing_cpu(cpu) {
1949                 if (iter->buffer_iter[cpu])
1950                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1951         }
1952         mutex_unlock(&trace_types_lock);
1953
1954         return ERR_PTR(-ENOMEM);
1955 }
1956
1957 int tracing_open_generic(struct inode *inode, struct file *filp)
1958 {
1959         if (tracing_disabled)
1960                 return -ENODEV;
1961
1962         filp->private_data = inode->i_private;
1963         return 0;
1964 }
1965
1966 int tracing_release(struct inode *inode, struct file *file)
1967 {
1968         struct seq_file *m = (struct seq_file *)file->private_data;
1969         struct trace_iterator *iter = m->private;
1970         int cpu;
1971
1972         mutex_lock(&trace_types_lock);
1973         for_each_tracing_cpu(cpu) {
1974                 if (iter->buffer_iter[cpu])
1975                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1976         }
1977
1978         if (iter->trace && iter->trace->close)
1979                 iter->trace->close(iter);
1980
1981         /* reenable tracing if it was previously enabled */
1982         if (iter->tr->ctrl) {
1983                 tracer_enabled = 1;
1984                 /*
1985                  * It is safe to enable function tracing even if it
1986                  * isn't used
1987                  */
1988                 ftrace_function_enabled = 1;
1989         }
1990         mutex_unlock(&trace_types_lock);
1991
1992         seq_release(inode, file);
1993         kfree(iter);
1994         return 0;
1995 }
1996
1997 static int tracing_open(struct inode *inode, struct file *file)
1998 {
1999         int ret;
2000
2001         __tracing_open(inode, file, &ret);
2002
2003         return ret;
2004 }
2005
2006 static int tracing_lt_open(struct inode *inode, struct file *file)
2007 {
2008         struct trace_iterator *iter;
2009         int ret;
2010
2011         iter = __tracing_open(inode, file, &ret);
2012
2013         if (!ret)
2014                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2015
2016         return ret;
2017 }
2018
2019
2020 static void *
2021 t_next(struct seq_file *m, void *v, loff_t *pos)
2022 {
2023         struct tracer *t = m->private;
2024
2025         (*pos)++;
2026
2027         if (t)
2028                 t = t->next;
2029
2030         m->private = t;
2031
2032         return t;
2033 }
2034
2035 static void *t_start(struct seq_file *m, loff_t *pos)
2036 {
2037         struct tracer *t = m->private;
2038         loff_t l = 0;
2039
2040         mutex_lock(&trace_types_lock);
2041         for (; t && l < *pos; t = t_next(m, t, &l))
2042                 ;
2043
2044         return t;
2045 }
2046
2047 static void t_stop(struct seq_file *m, void *p)
2048 {
2049         mutex_unlock(&trace_types_lock);
2050 }
2051
2052 static int t_show(struct seq_file *m, void *v)
2053 {
2054         struct tracer *t = v;
2055
2056         if (!t)
2057                 return 0;
2058
2059         seq_printf(m, "%s", t->name);
2060         if (t->next)
2061                 seq_putc(m, ' ');
2062         else
2063                 seq_putc(m, '\n');
2064
2065         return 0;
2066 }
2067
2068 static struct seq_operations show_traces_seq_ops = {
2069         .start          = t_start,
2070         .next           = t_next,
2071         .stop           = t_stop,
2072         .show           = t_show,
2073 };
2074
2075 static int show_traces_open(struct inode *inode, struct file *file)
2076 {
2077         int ret;
2078
2079         if (tracing_disabled)
2080                 return -ENODEV;
2081
2082         ret = seq_open(file, &show_traces_seq_ops);
2083         if (!ret) {
2084                 struct seq_file *m = file->private_data;
2085                 m->private = trace_types;
2086         }
2087
2088         return ret;
2089 }
2090
2091 static struct file_operations tracing_fops = {
2092         .open           = tracing_open,
2093         .read           = seq_read,
2094         .llseek         = seq_lseek,
2095         .release        = tracing_release,
2096 };
2097
2098 static struct file_operations tracing_lt_fops = {
2099         .open           = tracing_lt_open,
2100         .read           = seq_read,
2101         .llseek         = seq_lseek,
2102         .release        = tracing_release,
2103 };
2104
2105 static struct file_operations show_traces_fops = {
2106         .open           = show_traces_open,
2107         .read           = seq_read,
2108         .release        = seq_release,
2109 };
2110
2111 /*
2112  * Only trace on a CPU if the bitmask is set:
2113  */
2114 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2115
2116 /*
2117  * When tracing/tracing_cpu_mask is modified then this holds
2118  * the new bitmask we are about to install:
2119  */
2120 static cpumask_t tracing_cpumask_new;
2121
2122 /*
2123  * The tracer itself will not take this lock, but still we want
2124  * to provide a consistent cpumask to user-space:
2125  */
2126 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2127
2128 /*
2129  * Temporary storage for the character representation of the
2130  * CPU bitmask (and one more byte for the newline):
2131  */
2132 static char mask_str[NR_CPUS + 1];
2133
2134 static ssize_t
2135 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2136                      size_t count, loff_t *ppos)
2137 {
2138         int len;
2139
2140         mutex_lock(&tracing_cpumask_update_lock);
2141
2142         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2143         if (count - len < 2) {
2144                 count = -EINVAL;
2145                 goto out_err;
2146         }
2147         len += sprintf(mask_str + len, "\n");
2148         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2149
2150 out_err:
2151         mutex_unlock(&tracing_cpumask_update_lock);
2152
2153         return count;
2154 }
2155
2156 static ssize_t
2157 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2158                       size_t count, loff_t *ppos)
2159 {
2160         int err, cpu;
2161
2162         mutex_lock(&tracing_cpumask_update_lock);
2163         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2164         if (err)
2165                 goto err_unlock;
2166
2167         raw_local_irq_disable();
2168         __raw_spin_lock(&ftrace_max_lock);
2169         for_each_tracing_cpu(cpu) {
2170                 /*
2171                  * Increase/decrease the disabled counter if we are
2172                  * about to flip a bit in the cpumask:
2173                  */
2174                 if (cpu_isset(cpu, tracing_cpumask) &&
2175                                 !cpu_isset(cpu, tracing_cpumask_new)) {
2176                         atomic_inc(&global_trace.data[cpu]->disabled);
2177                 }
2178                 if (!cpu_isset(cpu, tracing_cpumask) &&
2179                                 cpu_isset(cpu, tracing_cpumask_new)) {
2180                         atomic_dec(&global_trace.data[cpu]->disabled);
2181                 }
2182         }
2183         __raw_spin_unlock(&ftrace_max_lock);
2184         raw_local_irq_enable();
2185
2186         tracing_cpumask = tracing_cpumask_new;
2187
2188         mutex_unlock(&tracing_cpumask_update_lock);
2189
2190         return count;
2191
2192 err_unlock:
2193         mutex_unlock(&tracing_cpumask_update_lock);
2194
2195         return err;
2196 }
2197
2198 static struct file_operations tracing_cpumask_fops = {
2199         .open           = tracing_open_generic,
2200         .read           = tracing_cpumask_read,
2201         .write          = tracing_cpumask_write,
2202 };
2203
2204 static ssize_t
2205 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2206                        size_t cnt, loff_t *ppos)
2207 {
2208         char *buf;
2209         int r = 0;
2210         int len = 0;
2211         int i;
2212
2213         /* calulate max size */
2214         for (i = 0; trace_options[i]; i++) {
2215                 len += strlen(trace_options[i]);
2216                 len += 3; /* "no" and space */
2217         }
2218
2219         /* +2 for \n and \0 */
2220         buf = kmalloc(len + 2, GFP_KERNEL);
2221         if (!buf)
2222                 return -ENOMEM;
2223
2224         for (i = 0; trace_options[i]; i++) {
2225                 if (trace_flags & (1 << i))
2226                         r += sprintf(buf + r, "%s ", trace_options[i]);
2227                 else
2228                         r += sprintf(buf + r, "no%s ", trace_options[i]);
2229         }
2230
2231         r += sprintf(buf + r, "\n");
2232         WARN_ON(r >= len + 2);
2233
2234         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2235
2236         kfree(buf);
2237
2238         return r;
2239 }
2240
2241 static ssize_t
2242 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2243                         size_t cnt, loff_t *ppos)
2244 {
2245         char buf[64];
2246         char *cmp = buf;
2247         int neg = 0;
2248         int i;
2249
2250         if (cnt >= sizeof(buf))
2251                 return -EINVAL;
2252
2253         if (copy_from_user(&buf, ubuf, cnt))
2254                 return -EFAULT;
2255
2256         buf[cnt] = 0;
2257
2258         if (strncmp(buf, "no", 2) == 0) {
2259                 neg = 1;
2260                 cmp += 2;
2261         }
2262
2263         for (i = 0; trace_options[i]; i++) {
2264                 int len = strlen(trace_options[i]);
2265
2266                 if (strncmp(cmp, trace_options[i], len) == 0) {
2267                         if (neg)
2268                                 trace_flags &= ~(1 << i);
2269                         else
2270                                 trace_flags |= (1 << i);
2271                         break;
2272                 }
2273         }
2274         /*
2275          * If no option could be set, return an error:
2276          */
2277         if (!trace_options[i])
2278                 return -EINVAL;
2279
2280         filp->f_pos += cnt;
2281
2282         return cnt;
2283 }
2284
2285 static struct file_operations tracing_iter_fops = {
2286         .open           = tracing_open_generic,
2287         .read           = tracing_iter_ctrl_read,
2288         .write          = tracing_iter_ctrl_write,
2289 };
2290
2291 static const char readme_msg[] =
2292         "tracing mini-HOWTO:\n\n"
2293         "# mkdir /debug\n"
2294         "# mount -t debugfs nodev /debug\n\n"
2295         "# cat /debug/tracing/available_tracers\n"
2296         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2297         "# cat /debug/tracing/current_tracer\n"
2298         "none\n"
2299         "# echo sched_switch > /debug/tracing/current_tracer\n"
2300         "# cat /debug/tracing/current_tracer\n"
2301         "sched_switch\n"
2302         "# cat /debug/tracing/iter_ctrl\n"
2303         "noprint-parent nosym-offset nosym-addr noverbose\n"
2304         "# echo print-parent > /debug/tracing/iter_ctrl\n"
2305         "# echo 1 > /debug/tracing/tracing_enabled\n"
2306         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2307         "echo 0 > /debug/tracing/tracing_enabled\n"
2308 ;
2309
2310 static ssize_t
2311 tracing_readme_read(struct file *filp, char __user *ubuf,
2312                        size_t cnt, loff_t *ppos)
2313 {
2314         return simple_read_from_buffer(ubuf, cnt, ppos,
2315                                         readme_msg, strlen(readme_msg));
2316 }
2317
2318 static struct file_operations tracing_readme_fops = {
2319         .open           = tracing_open_generic,
2320         .read           = tracing_readme_read,
2321 };
2322
2323 static ssize_t
2324 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2325                   size_t cnt, loff_t *ppos)
2326 {
2327         struct trace_array *tr = filp->private_data;
2328         char buf[64];
2329         int r;
2330
2331         r = sprintf(buf, "%ld\n", tr->ctrl);
2332         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2333 }
2334
2335 static ssize_t
2336 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2337                    size_t cnt, loff_t *ppos)
2338 {
2339         struct trace_array *tr = filp->private_data;
2340         char buf[64];
2341         long val;
2342         int ret;
2343
2344         if (cnt >= sizeof(buf))
2345                 return -EINVAL;
2346
2347         if (copy_from_user(&buf, ubuf, cnt))
2348                 return -EFAULT;
2349
2350         buf[cnt] = 0;
2351
2352         ret = strict_strtoul(buf, 10, &val);
2353         if (ret < 0)
2354                 return ret;
2355
2356         val = !!val;
2357
2358         mutex_lock(&trace_types_lock);
2359         if (tr->ctrl ^ val) {
2360                 if (val)
2361                         tracer_enabled = 1;
2362                 else
2363                         tracer_enabled = 0;
2364
2365                 tr->ctrl = val;
2366
2367                 if (current_trace && current_trace->ctrl_update)
2368                         current_trace->ctrl_update(tr);
2369         }
2370         mutex_unlock(&trace_types_lock);
2371
2372         filp->f_pos += cnt;
2373
2374         return cnt;
2375 }
2376
2377 static ssize_t
2378 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2379                        size_t cnt, loff_t *ppos)
2380 {
2381         char buf[max_tracer_type_len+2];
2382         int r;
2383
2384         mutex_lock(&trace_types_lock);
2385         if (current_trace)
2386                 r = sprintf(buf, "%s\n", current_trace->name);
2387         else
2388                 r = sprintf(buf, "\n");
2389         mutex_unlock(&trace_types_lock);
2390
2391         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2392 }
2393
2394 static ssize_t
2395 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2396                         size_t cnt, loff_t *ppos)
2397 {
2398         struct trace_array *tr = &global_trace;
2399         struct tracer *t;
2400         char buf[max_tracer_type_len+1];
2401         int i;
2402         size_t ret;
2403
2404         if (cnt > max_tracer_type_len)
2405                 cnt = max_tracer_type_len;
2406         ret = cnt;
2407
2408         if (copy_from_user(&buf, ubuf, cnt))
2409                 return -EFAULT;
2410
2411         buf[cnt] = 0;
2412
2413         /* strip ending whitespace. */
2414         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2415                 buf[i] = 0;
2416
2417         mutex_lock(&trace_types_lock);
2418         for (t = trace_types; t; t = t->next) {
2419                 if (strcmp(t->name, buf) == 0)
2420                         break;
2421         }
2422         if (!t) {
2423                 ret = -EINVAL;
2424                 goto out;
2425         }
2426         if (t == current_trace)
2427                 goto out;
2428
2429         if (current_trace && current_trace->reset)
2430                 current_trace->reset(tr);
2431
2432         current_trace = t;
2433         if (t->init)
2434                 t->init(tr);
2435
2436  out:
2437         mutex_unlock(&trace_types_lock);
2438
2439         if (ret == cnt)
2440                 filp->f_pos += cnt;
2441
2442         return ret;
2443 }
2444
2445 static ssize_t
2446 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2447                      size_t cnt, loff_t *ppos)
2448 {
2449         unsigned long *ptr = filp->private_data;
2450         char buf[64];
2451         int r;
2452
2453         r = snprintf(buf, sizeof(buf), "%ld\n",
2454                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2455         if (r > sizeof(buf))
2456                 r = sizeof(buf);
2457         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2458 }
2459
2460 static ssize_t
2461 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2462                       size_t cnt, loff_t *ppos)
2463 {
2464         long *ptr = filp->private_data;
2465         char buf[64];
2466         long val;
2467         int ret;
2468
2469         if (cnt >= sizeof(buf))
2470                 return -EINVAL;
2471
2472         if (copy_from_user(&buf, ubuf, cnt))
2473                 return -EFAULT;
2474
2475         buf[cnt] = 0;
2476
2477         ret = strict_strtoul(buf, 10, &val);
2478         if (ret < 0)
2479                 return ret;
2480
2481         *ptr = val * 1000;
2482
2483         return cnt;
2484 }
2485
2486 static atomic_t tracing_reader;
2487
2488 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2489 {
2490         struct trace_iterator *iter;
2491
2492         if (tracing_disabled)
2493                 return -ENODEV;
2494
2495         /* We only allow for reader of the pipe */
2496         if (atomic_inc_return(&tracing_reader) != 1) {
2497                 atomic_dec(&tracing_reader);
2498                 return -EBUSY;
2499         }
2500
2501         /* create a buffer to store the information to pass to userspace */
2502         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2503         if (!iter)
2504                 return -ENOMEM;
2505
2506         mutex_lock(&trace_types_lock);
2507         iter->tr = &global_trace;
2508         iter->trace = current_trace;
2509         filp->private_data = iter;
2510
2511         if (iter->trace->pipe_open)
2512                 iter->trace->pipe_open(iter);
2513         mutex_unlock(&trace_types_lock);
2514
2515         return 0;
2516 }
2517
2518 static int tracing_release_pipe(struct inode *inode, struct file *file)
2519 {
2520         struct trace_iterator *iter = file->private_data;
2521
2522         kfree(iter);
2523         atomic_dec(&tracing_reader);
2524
2525         return 0;
2526 }
2527
2528 static unsigned int
2529 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2530 {
2531         struct trace_iterator *iter = filp->private_data;
2532
2533         if (trace_flags & TRACE_ITER_BLOCK) {
2534                 /*
2535                  * Always select as readable when in blocking mode
2536                  */
2537                 return POLLIN | POLLRDNORM;
2538         } else {
2539                 if (!trace_empty(iter))
2540                         return POLLIN | POLLRDNORM;
2541                 poll_wait(filp, &trace_wait, poll_table);
2542                 if (!trace_empty(iter))
2543                         return POLLIN | POLLRDNORM;
2544
2545                 return 0;
2546         }
2547 }
2548
2549 /*
2550  * Consumer reader.
2551  */
2552 static ssize_t
2553 tracing_read_pipe(struct file *filp, char __user *ubuf,
2554                   size_t cnt, loff_t *ppos)
2555 {
2556         struct trace_iterator *iter = filp->private_data;
2557         ssize_t sret;
2558
2559         /* return any leftover data */
2560         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2561         if (sret != -EBUSY)
2562                 return sret;
2563
2564         trace_seq_reset(&iter->seq);
2565
2566         mutex_lock(&trace_types_lock);
2567         if (iter->trace->read) {
2568                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2569                 if (sret)
2570                         goto out;
2571         }
2572
2573 waitagain:
2574         sret = 0;
2575         while (trace_empty(iter)) {
2576
2577                 if ((filp->f_flags & O_NONBLOCK)) {
2578                         sret = -EAGAIN;
2579                         goto out;
2580                 }
2581
2582                 /*
2583                  * This is a make-shift waitqueue. The reason we don't use
2584                  * an actual wait queue is because:
2585                  *  1) we only ever have one waiter
2586                  *  2) the tracing, traces all functions, we don't want
2587                  *     the overhead of calling wake_up and friends
2588                  *     (and tracing them too)
2589                  *     Anyway, this is really very primitive wakeup.
2590                  */
2591                 set_current_state(TASK_INTERRUPTIBLE);
2592                 iter->tr->waiter = current;
2593
2594                 mutex_unlock(&trace_types_lock);
2595
2596                 /* sleep for 100 msecs, and try again. */
2597                 schedule_timeout(HZ/10);
2598
2599                 mutex_lock(&trace_types_lock);
2600
2601                 iter->tr->waiter = NULL;
2602
2603                 if (signal_pending(current)) {
2604                         sret = -EINTR;
2605                         goto out;
2606                 }
2607
2608                 if (iter->trace != current_trace)
2609                         goto out;
2610
2611                 /*
2612                  * We block until we read something and tracing is disabled.
2613                  * We still block if tracing is disabled, but we have never
2614                  * read anything. This allows a user to cat this file, and
2615                  * then enable tracing. But after we have read something,
2616                  * we give an EOF when tracing is again disabled.
2617                  *
2618                  * iter->pos will be 0 if we haven't read anything.
2619                  */
2620                 if (!tracer_enabled && iter->pos)
2621                         break;
2622
2623                 continue;
2624         }
2625
2626         /* stop when tracing is finished */
2627         if (trace_empty(iter))
2628                 goto out;
2629
2630         if (cnt >= PAGE_SIZE)
2631                 cnt = PAGE_SIZE - 1;
2632
2633         /* reset all but tr, trace, and overruns */
2634         memset(&iter->seq, 0,
2635                sizeof(struct trace_iterator) -
2636                offsetof(struct trace_iterator, seq));
2637         iter->pos = -1;
2638
2639         while (find_next_entry_inc(iter) != NULL) {
2640                 enum print_line_t ret;
2641                 int len = iter->seq.len;
2642
2643                 ret = print_trace_line(iter);
2644                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2645                         /* don't print partial lines */
2646                         iter->seq.len = len;
2647                         break;
2648                 }
2649
2650                 trace_consume(iter);
2651
2652                 if (iter->seq.len >= cnt)
2653                         break;
2654         }
2655
2656         /* Now copy what we have to the user */
2657         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2658         if (iter->seq.readpos >= iter->seq.len)
2659                 trace_seq_reset(&iter->seq);
2660
2661         /*
2662          * If there was nothing to send to user, inspite of consuming trace
2663          * entries, go back to wait for more entries.
2664          */
2665         if (sret == -EBUSY)
2666                 goto waitagain;
2667
2668 out:
2669         mutex_unlock(&trace_types_lock);
2670
2671         return sret;
2672 }
2673
2674 static ssize_t
2675 tracing_entries_read(struct file *filp, char __user *ubuf,
2676                      size_t cnt, loff_t *ppos)
2677 {
2678         struct trace_array *tr = filp->private_data;
2679         char buf[64];
2680         int r;
2681
2682         r = sprintf(buf, "%lu\n", tr->entries);
2683         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2684 }
2685
2686 static ssize_t
2687 tracing_entries_write(struct file *filp, const char __user *ubuf,
2688                       size_t cnt, loff_t *ppos)
2689 {
2690         unsigned long val;
2691         char buf[64];
2692         int ret;
2693         struct trace_array *tr = filp->private_data;
2694
2695         if (cnt >= sizeof(buf))
2696                 return -EINVAL;
2697
2698         if (copy_from_user(&buf, ubuf, cnt))
2699                 return -EFAULT;
2700
2701         buf[cnt] = 0;
2702
2703         ret = strict_strtoul(buf, 10, &val);
2704         if (ret < 0)
2705                 return ret;
2706
2707         /* must have at least 1 entry */
2708         if (!val)
2709                 return -EINVAL;
2710
2711         mutex_lock(&trace_types_lock);
2712
2713         if (tr->ctrl) {
2714                 cnt = -EBUSY;
2715                 pr_info("ftrace: please disable tracing"
2716                         " before modifying buffer size\n");
2717                 goto out;
2718         }
2719
2720         if (val != global_trace.entries) {
2721                 ret = ring_buffer_resize(global_trace.buffer, val);
2722                 if (ret < 0) {
2723                         cnt = ret;
2724                         goto out;
2725                 }
2726
2727                 ret = ring_buffer_resize(max_tr.buffer, val);
2728                 if (ret < 0) {
2729                         int r;
2730                         cnt = ret;
2731                         r = ring_buffer_resize(global_trace.buffer,
2732                                                global_trace.entries);
2733                         if (r < 0) {
2734                                 /* AARGH! We are left with different
2735                                  * size max buffer!!!! */
2736                                 WARN_ON(1);
2737                                 tracing_disabled = 1;
2738                         }
2739                         goto out;
2740                 }
2741
2742                 global_trace.entries = val;
2743         }
2744
2745         filp->f_pos += cnt;
2746
2747         /* If check pages failed, return ENOMEM */
2748         if (tracing_disabled)
2749                 cnt = -ENOMEM;
2750  out:
2751         max_tr.entries = global_trace.entries;
2752         mutex_unlock(&trace_types_lock);
2753
2754         return cnt;
2755 }
2756
2757 static int mark_printk(const char *fmt, ...)
2758 {
2759         int ret;
2760         va_list args;
2761         va_start(args, fmt);
2762         ret = trace_vprintk(0, fmt, args);
2763         va_end(args);
2764         return ret;
2765 }
2766
2767 static ssize_t
2768 tracing_mark_write(struct file *filp, const char __user *ubuf,
2769                                         size_t cnt, loff_t *fpos)
2770 {
2771         char *buf;
2772         char *end;
2773         struct trace_array *tr = &global_trace;
2774
2775         if (!tr->ctrl || tracing_disabled)
2776                 return -EINVAL;
2777
2778         if (cnt > TRACE_BUF_SIZE)
2779                 cnt = TRACE_BUF_SIZE;
2780
2781         buf = kmalloc(cnt + 1, GFP_KERNEL);
2782         if (buf == NULL)
2783                 return -ENOMEM;
2784
2785         if (copy_from_user(buf, ubuf, cnt)) {
2786                 kfree(buf);
2787                 return -EFAULT;
2788         }
2789
2790         /* Cut from the first nil or newline. */
2791         buf[cnt] = '\0';
2792         end = strchr(buf, '\n');
2793         if (end)
2794                 *end = '\0';
2795
2796         cnt = mark_printk("%s\n", buf);
2797         kfree(buf);
2798         *fpos += cnt;
2799
2800         return cnt;
2801 }
2802
2803 static struct file_operations tracing_max_lat_fops = {
2804         .open           = tracing_open_generic,
2805         .read           = tracing_max_lat_read,
2806         .write          = tracing_max_lat_write,
2807 };
2808
2809 static struct file_operations tracing_ctrl_fops = {
2810         .open           = tracing_open_generic,
2811         .read           = tracing_ctrl_read,
2812         .write          = tracing_ctrl_write,
2813 };
2814
2815 static struct file_operations set_tracer_fops = {
2816         .open           = tracing_open_generic,
2817         .read           = tracing_set_trace_read,
2818         .write          = tracing_set_trace_write,
2819 };
2820
2821 static struct file_operations tracing_pipe_fops = {
2822         .open           = tracing_open_pipe,
2823         .poll           = tracing_poll_pipe,
2824         .read           = tracing_read_pipe,
2825         .release        = tracing_release_pipe,
2826 };
2827
2828 static struct file_operations tracing_entries_fops = {
2829         .open           = tracing_open_generic,
2830         .read           = tracing_entries_read,
2831         .write          = tracing_entries_write,
2832 };
2833
2834 static struct file_operations tracing_mark_fops = {
2835         .open           = tracing_open_generic,
2836         .write          = tracing_mark_write,
2837 };
2838
2839 #ifdef CONFIG_DYNAMIC_FTRACE
2840
2841 static ssize_t
2842 tracing_read_long(struct file *filp, char __user *ubuf,
2843                   size_t cnt, loff_t *ppos)
2844 {
2845         unsigned long *p = filp->private_data;
2846         char buf[64];
2847         int r;
2848
2849         r = sprintf(buf, "%ld\n", *p);
2850
2851         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2852 }
2853
2854 static struct file_operations tracing_read_long_fops = {
2855         .open           = tracing_open_generic,
2856         .read           = tracing_read_long,
2857 };
2858 #endif
2859
2860 static struct dentry *d_tracer;
2861
2862 struct dentry *tracing_init_dentry(void)
2863 {
2864         static int once;
2865
2866         if (d_tracer)
2867                 return d_tracer;
2868
2869         d_tracer = debugfs_create_dir("tracing", NULL);
2870
2871         if (!d_tracer && !once) {
2872                 once = 1;
2873                 pr_warning("Could not create debugfs directory 'tracing'\n");
2874                 return NULL;
2875         }
2876
2877         return d_tracer;
2878 }
2879
2880 #ifdef CONFIG_FTRACE_SELFTEST
2881 /* Let selftest have access to static functions in this file */
2882 #include "trace_selftest.c"
2883 #endif
2884
2885 static __init int tracer_init_debugfs(void)
2886 {
2887         struct dentry *d_tracer;
2888         struct dentry *entry;
2889
2890         d_tracer = tracing_init_dentry();
2891
2892         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2893                                     &global_trace, &tracing_ctrl_fops);
2894         if (!entry)
2895                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2896
2897         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2898                                     NULL, &tracing_iter_fops);
2899         if (!entry)
2900                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2901
2902         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2903                                     NULL, &tracing_cpumask_fops);
2904         if (!entry)
2905                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2906
2907         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2908                                     &global_trace, &tracing_lt_fops);
2909         if (!entry)
2910                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2911
2912         entry = debugfs_create_file("trace", 0444, d_tracer,
2913                                     &global_trace, &tracing_fops);
2914         if (!entry)
2915                 pr_warning("Could not create debugfs 'trace' entry\n");
2916
2917         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2918                                     &global_trace, &show_traces_fops);
2919         if (!entry)
2920                 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2921
2922         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2923                                     &global_trace, &set_tracer_fops);
2924         if (!entry)
2925                 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2926
2927         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2928                                     &tracing_max_latency,
2929                                     &tracing_max_lat_fops);
2930         if (!entry)
2931                 pr_warning("Could not create debugfs "
2932                            "'tracing_max_latency' entry\n");
2933
2934         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2935                                     &tracing_thresh, &tracing_max_lat_fops);
2936         if (!entry)
2937                 pr_warning("Could not create debugfs "
2938                            "'tracing_thresh' entry\n");
2939         entry = debugfs_create_file("README", 0644, d_tracer,
2940                                     NULL, &tracing_readme_fops);
2941         if (!entry)
2942                 pr_warning("Could not create debugfs 'README' entry\n");
2943
2944         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2945                                     NULL, &tracing_pipe_fops);
2946         if (!entry)
2947                 pr_warning("Could not create debugfs "
2948                            "'trace_pipe' entry\n");
2949
2950         entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2951                                     &global_trace, &tracing_entries_fops);
2952         if (!entry)
2953                 pr_warning("Could not create debugfs "
2954                            "'trace_entries' entry\n");
2955
2956         entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2957                                     NULL, &tracing_mark_fops);
2958         if (!entry)
2959                 pr_warning("Could not create debugfs "
2960                            "'trace_marker' entry\n");
2961
2962 #ifdef CONFIG_DYNAMIC_FTRACE
2963         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2964                                     &ftrace_update_tot_cnt,
2965                                     &tracing_read_long_fops);
2966         if (!entry)
2967                 pr_warning("Could not create debugfs "
2968                            "'dyn_ftrace_total_info' entry\n");
2969 #endif
2970 #ifdef CONFIG_SYSPROF_TRACER
2971         init_tracer_sysprof_debugfs(d_tracer);
2972 #endif
2973         return 0;
2974 }
2975
2976 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2977 {
2978         static DEFINE_SPINLOCK(trace_buf_lock);
2979         static char trace_buf[TRACE_BUF_SIZE];
2980
2981         struct ring_buffer_event *event;
2982         struct trace_array *tr = &global_trace;
2983         struct trace_array_cpu *data;
2984         struct print_entry *entry;
2985         unsigned long flags, irq_flags;
2986         int cpu, len = 0, size, pc;
2987
2988         if (!tr->ctrl || tracing_disabled)
2989                 return 0;
2990
2991         pc = preempt_count();
2992         preempt_disable_notrace();
2993         cpu = raw_smp_processor_id();
2994         data = tr->data[cpu];
2995
2996         if (unlikely(atomic_read(&data->disabled)))
2997                 goto out;
2998
2999         spin_lock_irqsave(&trace_buf_lock, flags);
3000         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3001
3002         len = min(len, TRACE_BUF_SIZE-1);
3003         trace_buf[len] = 0;
3004
3005         size = sizeof(*entry) + len + 1;
3006         event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3007         if (!event)
3008                 goto out_unlock;
3009         entry = ring_buffer_event_data(event);
3010         tracing_generic_entry_update(&entry->ent, flags, pc);
3011         entry->ent.type                 = TRACE_PRINT;
3012         entry->ip                       = ip;
3013
3014         memcpy(&entry->buf, trace_buf, len);
3015         entry->buf[len] = 0;
3016         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3017
3018  out_unlock:
3019         spin_unlock_irqrestore(&trace_buf_lock, flags);
3020
3021  out:
3022         preempt_enable_notrace();
3023
3024         return len;
3025 }
3026 EXPORT_SYMBOL_GPL(trace_vprintk);
3027
3028 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3029 {
3030         int ret;
3031         va_list ap;
3032
3033         if (!(trace_flags & TRACE_ITER_PRINTK))
3034                 return 0;
3035
3036         va_start(ap, fmt);
3037         ret = trace_vprintk(ip, fmt, ap);
3038         va_end(ap);
3039         return ret;
3040 }
3041 EXPORT_SYMBOL_GPL(__ftrace_printk);
3042
3043 static int trace_panic_handler(struct notifier_block *this,
3044                                unsigned long event, void *unused)
3045 {
3046         if (ftrace_dump_on_oops)
3047                 ftrace_dump();
3048         return NOTIFY_OK;
3049 }
3050
3051 static struct notifier_block trace_panic_notifier = {
3052         .notifier_call  = trace_panic_handler,
3053         .next           = NULL,
3054         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
3055 };
3056
3057 static int trace_die_handler(struct notifier_block *self,
3058                              unsigned long val,
3059                              void *data)
3060 {
3061         switch (val) {
3062         case DIE_OOPS:
3063                 if (ftrace_dump_on_oops)
3064                         ftrace_dump();
3065                 break;
3066         default:
3067                 break;
3068         }
3069         return NOTIFY_OK;
3070 }
3071
3072 static struct notifier_block trace_die_notifier = {
3073         .notifier_call = trace_die_handler,
3074         .priority = 200
3075 };
3076
3077 /*
3078  * printk is set to max of 1024, we really don't need it that big.
3079  * Nothing should be printing 1000 characters anyway.
3080  */
3081 #define TRACE_MAX_PRINT         1000
3082
3083 /*
3084  * Define here KERN_TRACE so that we have one place to modify
3085  * it if we decide to change what log level the ftrace dump
3086  * should be at.
3087  */
3088 #define KERN_TRACE              KERN_INFO
3089
3090 static void
3091 trace_printk_seq(struct trace_seq *s)
3092 {
3093         /* Probably should print a warning here. */
3094         if (s->len >= 1000)
3095                 s->len = 1000;
3096
3097         /* should be zero ended, but we are paranoid. */
3098         s->buffer[s->len] = 0;
3099
3100         printk(KERN_TRACE "%s", s->buffer);
3101
3102         trace_seq_reset(s);
3103 }
3104
3105 void ftrace_dump(void)
3106 {
3107         static DEFINE_SPINLOCK(ftrace_dump_lock);
3108         /* use static because iter can be a bit big for the stack */
3109         static struct trace_iterator iter;
3110         static cpumask_t mask;
3111         static int dump_ran;
3112         unsigned long flags;
3113         int cnt = 0, cpu;
3114
3115         /* only one dump */
3116         spin_lock_irqsave(&ftrace_dump_lock, flags);
3117         if (dump_ran)
3118                 goto out;
3119
3120         dump_ran = 1;
3121
3122         /* No turning back! */
3123         ftrace_kill_atomic();
3124
3125         for_each_tracing_cpu(cpu) {
3126                 atomic_inc(&global_trace.data[cpu]->disabled);
3127         }
3128
3129         printk(KERN_TRACE "Dumping ftrace buffer:\n");
3130
3131         iter.tr = &global_trace;
3132         iter.trace = current_trace;
3133
3134         /*
3135          * We need to stop all tracing on all CPUS to read the
3136          * the next buffer. This is a bit expensive, but is
3137          * not done often. We fill all what we can read,
3138          * and then release the locks again.
3139          */
3140
3141         cpus_clear(mask);
3142
3143         while (!trace_empty(&iter)) {
3144
3145                 if (!cnt)
3146                         printk(KERN_TRACE "---------------------------------\n");
3147
3148                 cnt++;
3149
3150                 /* reset all but tr, trace, and overruns */
3151                 memset(&iter.seq, 0,
3152                        sizeof(struct trace_iterator) -
3153                        offsetof(struct trace_iterator, seq));
3154                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3155                 iter.pos = -1;
3156
3157                 if (find_next_entry_inc(&iter) != NULL) {
3158                         print_trace_line(&iter);
3159                         trace_consume(&iter);
3160                 }
3161
3162                 trace_printk_seq(&iter.seq);
3163         }
3164
3165         if (!cnt)
3166                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
3167         else
3168                 printk(KERN_TRACE "---------------------------------\n");
3169
3170  out:
3171         spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3172 }
3173
3174 __init static int tracer_alloc_buffers(void)
3175 {
3176         struct trace_array_cpu *data;
3177         int i;
3178
3179         /* TODO: make the number of buffers hot pluggable with CPUS */
3180         tracing_buffer_mask = cpu_possible_map;
3181
3182         global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3183                                                    TRACE_BUFFER_FLAGS);
3184         if (!global_trace.buffer) {
3185                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3186                 WARN_ON(1);
3187                 return 0;
3188         }
3189         global_trace.entries = ring_buffer_size(global_trace.buffer);
3190
3191 #ifdef CONFIG_TRACER_MAX_TRACE
3192         max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3193                                              TRACE_BUFFER_FLAGS);
3194         if (!max_tr.buffer) {
3195                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3196                 WARN_ON(1);
3197                 ring_buffer_free(global_trace.buffer);
3198                 return 0;
3199         }
3200         max_tr.entries = ring_buffer_size(max_tr.buffer);
3201         WARN_ON(max_tr.entries != global_trace.entries);
3202 #endif
3203
3204         /* Allocate the first page for all buffers */
3205         for_each_tracing_cpu(i) {
3206                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3207                 max_tr.data[i] = &per_cpu(max_data, i);
3208         }
3209
3210         trace_init_cmdlines();
3211
3212         register_tracer(&nop_trace);
3213 #ifdef CONFIG_BOOT_TRACER
3214         register_tracer(&boot_tracer);
3215         current_trace = &boot_tracer;
3216         current_trace->init(&global_trace);
3217 #else
3218         current_trace = &nop_trace;
3219 #endif
3220
3221         /* All seems OK, enable tracing */
3222         global_trace.ctrl = tracer_enabled;
3223         tracing_disabled = 0;
3224
3225         atomic_notifier_chain_register(&panic_notifier_list,
3226                                        &trace_panic_notifier);
3227
3228         register_die_notifier(&trace_die_notifier);
3229
3230         return 0;
3231 }
3232 early_initcall(tracer_alloc_buffers);
3233 fs_initcall(tracer_init_debugfs);