tracing/ftrace: implement a set_flag callback for tracers
[safe/jmp/linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/notifier.h>
18 #include <linux/debugfs.h>
19 #include <linux/pagemap.h>
20 #include <linux/hardirq.h>
21 #include <linux/linkage.h>
22 #include <linux/uaccess.h>
23 #include <linux/ftrace.h>
24 #include <linux/module.h>
25 #include <linux/percpu.h>
26 #include <linux/kdebug.h>
27 #include <linux/ctype.h>
28 #include <linux/init.h>
29 #include <linux/poll.h>
30 #include <linux/gfp.h>
31 #include <linux/fs.h>
32 #include <linux/kprobes.h>
33 #include <linux/writeback.h>
34
35 #include <linux/stacktrace.h>
36 #include <linux/ring_buffer.h>
37 #include <linux/irqflags.h>
38
39 #include "trace.h"
40
41 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
42
43 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
44 unsigned long __read_mostly     tracing_thresh;
45
46 /* For tracers that don't implement custom flags */
47 static struct tracer_opt dummy_tracer_opt[] = {
48         { }
49 };
50
51 static struct tracer_flags dummy_tracer_flags = {
52         .val = 0,
53         .opts = dummy_tracer_opt
54 };
55
56 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
57 {
58         return 0;
59 }
60
61 /*
62  * Kill all tracing for good (never come back).
63  * It is initialized to 1 but will turn to zero if the initialization
64  * of the tracer is successful. But that is the only place that sets
65  * this back to zero.
66  */
67 int tracing_disabled = 1;
68
69 static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
70
71 static inline void ftrace_disable_cpu(void)
72 {
73         preempt_disable();
74         local_inc(&__get_cpu_var(ftrace_cpu_disabled));
75 }
76
77 static inline void ftrace_enable_cpu(void)
78 {
79         local_dec(&__get_cpu_var(ftrace_cpu_disabled));
80         preempt_enable();
81 }
82
83 static cpumask_t __read_mostly          tracing_buffer_mask;
84
85 #define for_each_tracing_cpu(cpu)       \
86         for_each_cpu_mask(cpu, tracing_buffer_mask)
87
88 /*
89  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
90  *
91  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
92  * is set, then ftrace_dump is called. This will output the contents
93  * of the ftrace buffers to the console.  This is very useful for
94  * capturing traces that lead to crashes and outputing it to a
95  * serial console.
96  *
97  * It is default off, but you can enable it with either specifying
98  * "ftrace_dump_on_oops" in the kernel command line, or setting
99  * /proc/sys/kernel/ftrace_dump_on_oops to true.
100  */
101 int ftrace_dump_on_oops;
102
103 static int tracing_set_tracer(char *buf);
104
105 static int __init set_ftrace(char *str)
106 {
107         tracing_set_tracer(str);
108         return 1;
109 }
110 __setup("ftrace", set_ftrace);
111
112 static int __init set_ftrace_dump_on_oops(char *str)
113 {
114         ftrace_dump_on_oops = 1;
115         return 1;
116 }
117 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
118
119 long
120 ns2usecs(cycle_t nsec)
121 {
122         nsec += 500;
123         do_div(nsec, 1000);
124         return nsec;
125 }
126
127 cycle_t ftrace_now(int cpu)
128 {
129         u64 ts = ring_buffer_time_stamp(cpu);
130         ring_buffer_normalize_time_stamp(cpu, &ts);
131         return ts;
132 }
133
134 /*
135  * The global_trace is the descriptor that holds the tracing
136  * buffers for the live tracing. For each CPU, it contains
137  * a link list of pages that will store trace entries. The
138  * page descriptor of the pages in the memory is used to hold
139  * the link list by linking the lru item in the page descriptor
140  * to each of the pages in the buffer per CPU.
141  *
142  * For each active CPU there is a data field that holds the
143  * pages for the buffer for that CPU. Each CPU has the same number
144  * of pages allocated for its buffer.
145  */
146 static struct trace_array       global_trace;
147
148 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
149
150 /*
151  * The max_tr is used to snapshot the global_trace when a maximum
152  * latency is reached. Some tracers will use this to store a maximum
153  * trace while it continues examining live traces.
154  *
155  * The buffers for the max_tr are set up the same as the global_trace.
156  * When a snapshot is taken, the link list of the max_tr is swapped
157  * with the link list of the global_trace and the buffers are reset for
158  * the global_trace so the tracing can continue.
159  */
160 static struct trace_array       max_tr;
161
162 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
163
164 /* tracer_enabled is used to toggle activation of a tracer */
165 static int                      tracer_enabled = 1;
166
167 /**
168  * tracing_is_enabled - return tracer_enabled status
169  *
170  * This function is used by other tracers to know the status
171  * of the tracer_enabled flag.  Tracers may use this function
172  * to know if it should enable their features when starting
173  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
174  */
175 int tracing_is_enabled(void)
176 {
177         return tracer_enabled;
178 }
179
180 /* function tracing enabled */
181 int                             ftrace_function_enabled;
182
183 /*
184  * trace_buf_size is the size in bytes that is allocated
185  * for a buffer. Note, the number of bytes is always rounded
186  * to page size.
187  *
188  * This number is purposely set to a low number of 16384.
189  * If the dump on oops happens, it will be much appreciated
190  * to not have to wait for all that output. Anyway this can be
191  * boot time and run time configurable.
192  */
193 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
194
195 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
196
197 /* trace_types holds a link list of available tracers. */
198 static struct tracer            *trace_types __read_mostly;
199
200 /* current_trace points to the tracer that is currently active */
201 static struct tracer            *current_trace __read_mostly;
202
203 /*
204  * max_tracer_type_len is used to simplify the allocating of
205  * buffers to read userspace tracer names. We keep track of
206  * the longest tracer name registered.
207  */
208 static int                      max_tracer_type_len;
209
210 /*
211  * trace_types_lock is used to protect the trace_types list.
212  * This lock is also used to keep user access serialized.
213  * Accesses from userspace will grab this lock while userspace
214  * activities happen inside the kernel.
215  */
216 static DEFINE_MUTEX(trace_types_lock);
217
218 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
219 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
220
221 /* trace_flags holds trace_options default values */
222 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
223         TRACE_ITER_ANNOTATE;
224
225 /**
226  * trace_wake_up - wake up tasks waiting for trace input
227  *
228  * Simply wakes up any task that is blocked on the trace_wait
229  * queue. These is used with trace_poll for tasks polling the trace.
230  */
231 void trace_wake_up(void)
232 {
233         /*
234          * The runqueue_is_locked() can fail, but this is the best we
235          * have for now:
236          */
237         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
238                 wake_up(&trace_wait);
239 }
240
241 static int __init set_buf_size(char *str)
242 {
243         unsigned long buf_size;
244         int ret;
245
246         if (!str)
247                 return 0;
248         ret = strict_strtoul(str, 0, &buf_size);
249         /* nr_entries can not be zero */
250         if (ret < 0 || buf_size == 0)
251                 return 0;
252         trace_buf_size = buf_size;
253         return 1;
254 }
255 __setup("trace_buf_size=", set_buf_size);
256
257 unsigned long nsecs_to_usecs(unsigned long nsecs)
258 {
259         return nsecs / 1000;
260 }
261
262 /* These must match the bit postions in trace_iterator_flags */
263 static const char *trace_options[] = {
264         "print-parent",
265         "sym-offset",
266         "sym-addr",
267         "verbose",
268         "raw",
269         "hex",
270         "bin",
271         "block",
272         "stacktrace",
273         "sched-tree",
274         "ftrace_printk",
275         "ftrace_preempt",
276         "branch",
277         "annotate",
278         NULL
279 };
280
281 /*
282  * ftrace_max_lock is used to protect the swapping of buffers
283  * when taking a max snapshot. The buffers themselves are
284  * protected by per_cpu spinlocks. But the action of the swap
285  * needs its own lock.
286  *
287  * This is defined as a raw_spinlock_t in order to help
288  * with performance when lockdep debugging is enabled.
289  */
290 static raw_spinlock_t ftrace_max_lock =
291         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
292
293 /*
294  * Copy the new maximum trace into the separate maximum-trace
295  * structure. (this way the maximum trace is permanently saved,
296  * for later retrieval via /debugfs/tracing/latency_trace)
297  */
298 static void
299 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
300 {
301         struct trace_array_cpu *data = tr->data[cpu];
302
303         max_tr.cpu = cpu;
304         max_tr.time_start = data->preempt_timestamp;
305
306         data = max_tr.data[cpu];
307         data->saved_latency = tracing_max_latency;
308
309         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
310         data->pid = tsk->pid;
311         data->uid = tsk->uid;
312         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
313         data->policy = tsk->policy;
314         data->rt_priority = tsk->rt_priority;
315
316         /* record this tasks comm */
317         tracing_record_cmdline(current);
318 }
319
320 /**
321  * trace_seq_printf - sequence printing of trace information
322  * @s: trace sequence descriptor
323  * @fmt: printf format string
324  *
325  * The tracer may use either sequence operations or its own
326  * copy to user routines. To simplify formating of a trace
327  * trace_seq_printf is used to store strings into a special
328  * buffer (@s). Then the output may be either used by
329  * the sequencer or pulled into another buffer.
330  */
331 int
332 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
333 {
334         int len = (PAGE_SIZE - 1) - s->len;
335         va_list ap;
336         int ret;
337
338         if (!len)
339                 return 0;
340
341         va_start(ap, fmt);
342         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
343         va_end(ap);
344
345         /* If we can't write it all, don't bother writing anything */
346         if (ret >= len)
347                 return 0;
348
349         s->len += ret;
350
351         return len;
352 }
353
354 /**
355  * trace_seq_puts - trace sequence printing of simple string
356  * @s: trace sequence descriptor
357  * @str: simple string to record
358  *
359  * The tracer may use either the sequence operations or its own
360  * copy to user routines. This function records a simple string
361  * into a special buffer (@s) for later retrieval by a sequencer
362  * or other mechanism.
363  */
364 static int
365 trace_seq_puts(struct trace_seq *s, const char *str)
366 {
367         int len = strlen(str);
368
369         if (len > ((PAGE_SIZE - 1) - s->len))
370                 return 0;
371
372         memcpy(s->buffer + s->len, str, len);
373         s->len += len;
374
375         return len;
376 }
377
378 static int
379 trace_seq_putc(struct trace_seq *s, unsigned char c)
380 {
381         if (s->len >= (PAGE_SIZE - 1))
382                 return 0;
383
384         s->buffer[s->len++] = c;
385
386         return 1;
387 }
388
389 static int
390 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
391 {
392         if (len > ((PAGE_SIZE - 1) - s->len))
393                 return 0;
394
395         memcpy(s->buffer + s->len, mem, len);
396         s->len += len;
397
398         return len;
399 }
400
401 #define MAX_MEMHEX_BYTES        8
402 #define HEX_CHARS               (MAX_MEMHEX_BYTES*2 + 1)
403
404 static int
405 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
406 {
407         unsigned char hex[HEX_CHARS];
408         unsigned char *data = mem;
409         int i, j;
410
411 #ifdef __BIG_ENDIAN
412         for (i = 0, j = 0; i < len; i++) {
413 #else
414         for (i = len-1, j = 0; i >= 0; i--) {
415 #endif
416                 hex[j++] = hex_asc_hi(data[i]);
417                 hex[j++] = hex_asc_lo(data[i]);
418         }
419         hex[j++] = ' ';
420
421         return trace_seq_putmem(s, hex, j);
422 }
423
424 static void
425 trace_seq_reset(struct trace_seq *s)
426 {
427         s->len = 0;
428         s->readpos = 0;
429 }
430
431 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
432 {
433         int len;
434         int ret;
435
436         if (s->len <= s->readpos)
437                 return -EBUSY;
438
439         len = s->len - s->readpos;
440         if (cnt > len)
441                 cnt = len;
442         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
443         if (ret)
444                 return -EFAULT;
445
446         s->readpos += len;
447         return cnt;
448 }
449
450 static void
451 trace_print_seq(struct seq_file *m, struct trace_seq *s)
452 {
453         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
454
455         s->buffer[len] = 0;
456         seq_puts(m, s->buffer);
457
458         trace_seq_reset(s);
459 }
460
461 /**
462  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
463  * @tr: tracer
464  * @tsk: the task with the latency
465  * @cpu: The cpu that initiated the trace.
466  *
467  * Flip the buffers between the @tr and the max_tr and record information
468  * about which task was the cause of this latency.
469  */
470 void
471 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
472 {
473         struct ring_buffer *buf = tr->buffer;
474
475         WARN_ON_ONCE(!irqs_disabled());
476         __raw_spin_lock(&ftrace_max_lock);
477
478         tr->buffer = max_tr.buffer;
479         max_tr.buffer = buf;
480
481         ftrace_disable_cpu();
482         ring_buffer_reset(tr->buffer);
483         ftrace_enable_cpu();
484
485         __update_max_tr(tr, tsk, cpu);
486         __raw_spin_unlock(&ftrace_max_lock);
487 }
488
489 /**
490  * update_max_tr_single - only copy one trace over, and reset the rest
491  * @tr - tracer
492  * @tsk - task with the latency
493  * @cpu - the cpu of the buffer to copy.
494  *
495  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
496  */
497 void
498 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
499 {
500         int ret;
501
502         WARN_ON_ONCE(!irqs_disabled());
503         __raw_spin_lock(&ftrace_max_lock);
504
505         ftrace_disable_cpu();
506
507         ring_buffer_reset(max_tr.buffer);
508         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
509
510         ftrace_enable_cpu();
511
512         WARN_ON_ONCE(ret);
513
514         __update_max_tr(tr, tsk, cpu);
515         __raw_spin_unlock(&ftrace_max_lock);
516 }
517
518 /**
519  * register_tracer - register a tracer with the ftrace system.
520  * @type - the plugin for the tracer
521  *
522  * Register a new plugin tracer.
523  */
524 int register_tracer(struct tracer *type)
525 {
526         struct tracer *t;
527         int len;
528         int ret = 0;
529
530         if (!type->name) {
531                 pr_info("Tracer must have a name\n");
532                 return -1;
533         }
534
535         mutex_lock(&trace_types_lock);
536         for (t = trace_types; t; t = t->next) {
537                 if (strcmp(type->name, t->name) == 0) {
538                         /* already found */
539                         pr_info("Trace %s already registered\n",
540                                 type->name);
541                         ret = -1;
542                         goto out;
543                 }
544         }
545
546         if (!type->set_flag)
547                 type->set_flag = &dummy_set_flag;
548         if (!type->flags)
549                 type->flags = &dummy_tracer_flags;
550         else
551                 if (!type->flags->opts)
552                         type->flags->opts = dummy_tracer_opt;
553
554 #ifdef CONFIG_FTRACE_STARTUP_TEST
555         if (type->selftest) {
556                 struct tracer *saved_tracer = current_trace;
557                 struct trace_array *tr = &global_trace;
558                 int i;
559                 /*
560                  * Run a selftest on this tracer.
561                  * Here we reset the trace buffer, and set the current
562                  * tracer to be this tracer. The tracer can then run some
563                  * internal tracing to verify that everything is in order.
564                  * If we fail, we do not register this tracer.
565                  */
566                 for_each_tracing_cpu(i) {
567                         tracing_reset(tr, i);
568                 }
569                 current_trace = type;
570                 /* the test is responsible for initializing and enabling */
571                 pr_info("Testing tracer %s: ", type->name);
572                 ret = type->selftest(type, tr);
573                 /* the test is responsible for resetting too */
574                 current_trace = saved_tracer;
575                 if (ret) {
576                         printk(KERN_CONT "FAILED!\n");
577                         goto out;
578                 }
579                 /* Only reset on passing, to avoid touching corrupted buffers */
580                 for_each_tracing_cpu(i) {
581                         tracing_reset(tr, i);
582                 }
583                 printk(KERN_CONT "PASSED\n");
584         }
585 #endif
586
587         type->next = trace_types;
588         trace_types = type;
589         len = strlen(type->name);
590         if (len > max_tracer_type_len)
591                 max_tracer_type_len = len;
592
593  out:
594         mutex_unlock(&trace_types_lock);
595
596         return ret;
597 }
598
599 void unregister_tracer(struct tracer *type)
600 {
601         struct tracer **t;
602         int len;
603
604         mutex_lock(&trace_types_lock);
605         for (t = &trace_types; *t; t = &(*t)->next) {
606                 if (*t == type)
607                         goto found;
608         }
609         pr_info("Trace %s not registered\n", type->name);
610         goto out;
611
612  found:
613         *t = (*t)->next;
614         if (strlen(type->name) != max_tracer_type_len)
615                 goto out;
616
617         max_tracer_type_len = 0;
618         for (t = &trace_types; *t; t = &(*t)->next) {
619                 len = strlen((*t)->name);
620                 if (len > max_tracer_type_len)
621                         max_tracer_type_len = len;
622         }
623  out:
624         mutex_unlock(&trace_types_lock);
625 }
626
627 void tracing_reset(struct trace_array *tr, int cpu)
628 {
629         ftrace_disable_cpu();
630         ring_buffer_reset_cpu(tr->buffer, cpu);
631         ftrace_enable_cpu();
632 }
633
634 #define SAVED_CMDLINES 128
635 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
636 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
637 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
638 static int cmdline_idx;
639 static DEFINE_SPINLOCK(trace_cmdline_lock);
640
641 /* temporary disable recording */
642 atomic_t trace_record_cmdline_disabled __read_mostly;
643
644 static void trace_init_cmdlines(void)
645 {
646         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
647         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
648         cmdline_idx = 0;
649 }
650
651 static int trace_stop_count;
652 static DEFINE_SPINLOCK(tracing_start_lock);
653
654 /**
655  * tracing_start - quick start of the tracer
656  *
657  * If tracing is enabled but was stopped by tracing_stop,
658  * this will start the tracer back up.
659  */
660 void tracing_start(void)
661 {
662         struct ring_buffer *buffer;
663         unsigned long flags;
664
665         if (tracing_disabled)
666                 return;
667
668         spin_lock_irqsave(&tracing_start_lock, flags);
669         if (--trace_stop_count)
670                 goto out;
671
672         if (trace_stop_count < 0) {
673                 /* Someone screwed up their debugging */
674                 WARN_ON_ONCE(1);
675                 trace_stop_count = 0;
676                 goto out;
677         }
678
679
680         buffer = global_trace.buffer;
681         if (buffer)
682                 ring_buffer_record_enable(buffer);
683
684         buffer = max_tr.buffer;
685         if (buffer)
686                 ring_buffer_record_enable(buffer);
687
688         ftrace_start();
689  out:
690         spin_unlock_irqrestore(&tracing_start_lock, flags);
691 }
692
693 /**
694  * tracing_stop - quick stop of the tracer
695  *
696  * Light weight way to stop tracing. Use in conjunction with
697  * tracing_start.
698  */
699 void tracing_stop(void)
700 {
701         struct ring_buffer *buffer;
702         unsigned long flags;
703
704         ftrace_stop();
705         spin_lock_irqsave(&tracing_start_lock, flags);
706         if (trace_stop_count++)
707                 goto out;
708
709         buffer = global_trace.buffer;
710         if (buffer)
711                 ring_buffer_record_disable(buffer);
712
713         buffer = max_tr.buffer;
714         if (buffer)
715                 ring_buffer_record_disable(buffer);
716
717  out:
718         spin_unlock_irqrestore(&tracing_start_lock, flags);
719 }
720
721 void trace_stop_cmdline_recording(void);
722
723 static void trace_save_cmdline(struct task_struct *tsk)
724 {
725         unsigned map;
726         unsigned idx;
727
728         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
729                 return;
730
731         /*
732          * It's not the end of the world if we don't get
733          * the lock, but we also don't want to spin
734          * nor do we want to disable interrupts,
735          * so if we miss here, then better luck next time.
736          */
737         if (!spin_trylock(&trace_cmdline_lock))
738                 return;
739
740         idx = map_pid_to_cmdline[tsk->pid];
741         if (idx >= SAVED_CMDLINES) {
742                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
743
744                 map = map_cmdline_to_pid[idx];
745                 if (map <= PID_MAX_DEFAULT)
746                         map_pid_to_cmdline[map] = (unsigned)-1;
747
748                 map_pid_to_cmdline[tsk->pid] = idx;
749
750                 cmdline_idx = idx;
751         }
752
753         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
754
755         spin_unlock(&trace_cmdline_lock);
756 }
757
758 static char *trace_find_cmdline(int pid)
759 {
760         char *cmdline = "<...>";
761         unsigned map;
762
763         if (!pid)
764                 return "<idle>";
765
766         if (pid > PID_MAX_DEFAULT)
767                 goto out;
768
769         map = map_pid_to_cmdline[pid];
770         if (map >= SAVED_CMDLINES)
771                 goto out;
772
773         cmdline = saved_cmdlines[map];
774
775  out:
776         return cmdline;
777 }
778
779 void tracing_record_cmdline(struct task_struct *tsk)
780 {
781         if (atomic_read(&trace_record_cmdline_disabled))
782                 return;
783
784         trace_save_cmdline(tsk);
785 }
786
787 void
788 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
789                              int pc)
790 {
791         struct task_struct *tsk = current;
792
793         entry->preempt_count            = pc & 0xff;
794         entry->pid                      = (tsk) ? tsk->pid : 0;
795         entry->flags =
796 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
797                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
798 #else
799                 TRACE_FLAG_IRQS_NOSUPPORT |
800 #endif
801                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
802                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
803                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
804 }
805
806 void
807 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
808                unsigned long ip, unsigned long parent_ip, unsigned long flags,
809                int pc)
810 {
811         struct ring_buffer_event *event;
812         struct ftrace_entry *entry;
813         unsigned long irq_flags;
814
815         /* If we are reading the ring buffer, don't trace */
816         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
817                 return;
818
819         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
820                                          &irq_flags);
821         if (!event)
822                 return;
823         entry   = ring_buffer_event_data(event);
824         tracing_generic_entry_update(&entry->ent, flags, pc);
825         entry->ent.type                 = TRACE_FN;
826         entry->ip                       = ip;
827         entry->parent_ip                = parent_ip;
828         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
829 }
830
831 #ifdef CONFIG_FUNCTION_RET_TRACER
832 static void __trace_function_return(struct trace_array *tr,
833                                 struct trace_array_cpu *data,
834                                 struct ftrace_retfunc *trace,
835                                 unsigned long flags,
836                                 int pc)
837 {
838         struct ring_buffer_event *event;
839         struct ftrace_ret_entry *entry;
840         unsigned long irq_flags;
841
842         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
843                 return;
844
845         event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
846                                          &irq_flags);
847         if (!event)
848                 return;
849         entry   = ring_buffer_event_data(event);
850         tracing_generic_entry_update(&entry->ent, flags, pc);
851         entry->ent.type                 = TRACE_FN_RET;
852         entry->ip                       = trace->func;
853         entry->parent_ip        = trace->ret;
854         entry->rettime          = trace->rettime;
855         entry->calltime         = trace->calltime;
856         ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
857 }
858 #endif
859
860 void
861 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
862        unsigned long ip, unsigned long parent_ip, unsigned long flags,
863        int pc)
864 {
865         if (likely(!atomic_read(&data->disabled)))
866                 trace_function(tr, data, ip, parent_ip, flags, pc);
867 }
868
869 static void ftrace_trace_stack(struct trace_array *tr,
870                                struct trace_array_cpu *data,
871                                unsigned long flags,
872                                int skip, int pc)
873 {
874 #ifdef CONFIG_STACKTRACE
875         struct ring_buffer_event *event;
876         struct stack_entry *entry;
877         struct stack_trace trace;
878         unsigned long irq_flags;
879
880         if (!(trace_flags & TRACE_ITER_STACKTRACE))
881                 return;
882
883         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
884                                          &irq_flags);
885         if (!event)
886                 return;
887         entry   = ring_buffer_event_data(event);
888         tracing_generic_entry_update(&entry->ent, flags, pc);
889         entry->ent.type         = TRACE_STACK;
890
891         memset(&entry->caller, 0, sizeof(entry->caller));
892
893         trace.nr_entries        = 0;
894         trace.max_entries       = FTRACE_STACK_ENTRIES;
895         trace.skip              = skip;
896         trace.entries           = entry->caller;
897
898         save_stack_trace(&trace);
899         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
900 #endif
901 }
902
903 void __trace_stack(struct trace_array *tr,
904                    struct trace_array_cpu *data,
905                    unsigned long flags,
906                    int skip)
907 {
908         ftrace_trace_stack(tr, data, flags, skip, preempt_count());
909 }
910
911 static void
912 ftrace_trace_special(void *__tr, void *__data,
913                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
914                      int pc)
915 {
916         struct ring_buffer_event *event;
917         struct trace_array_cpu *data = __data;
918         struct trace_array *tr = __tr;
919         struct special_entry *entry;
920         unsigned long irq_flags;
921
922         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
923                                          &irq_flags);
924         if (!event)
925                 return;
926         entry   = ring_buffer_event_data(event);
927         tracing_generic_entry_update(&entry->ent, 0, pc);
928         entry->ent.type                 = TRACE_SPECIAL;
929         entry->arg1                     = arg1;
930         entry->arg2                     = arg2;
931         entry->arg3                     = arg3;
932         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
933         ftrace_trace_stack(tr, data, irq_flags, 4, pc);
934
935         trace_wake_up();
936 }
937
938 void
939 __trace_special(void *__tr, void *__data,
940                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
941 {
942         ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
943 }
944
945 void
946 tracing_sched_switch_trace(struct trace_array *tr,
947                            struct trace_array_cpu *data,
948                            struct task_struct *prev,
949                            struct task_struct *next,
950                            unsigned long flags, int pc)
951 {
952         struct ring_buffer_event *event;
953         struct ctx_switch_entry *entry;
954         unsigned long irq_flags;
955
956         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
957                                            &irq_flags);
958         if (!event)
959                 return;
960         entry   = ring_buffer_event_data(event);
961         tracing_generic_entry_update(&entry->ent, flags, pc);
962         entry->ent.type                 = TRACE_CTX;
963         entry->prev_pid                 = prev->pid;
964         entry->prev_prio                = prev->prio;
965         entry->prev_state               = prev->state;
966         entry->next_pid                 = next->pid;
967         entry->next_prio                = next->prio;
968         entry->next_state               = next->state;
969         entry->next_cpu = task_cpu(next);
970         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
971         ftrace_trace_stack(tr, data, flags, 5, pc);
972 }
973
974 void
975 tracing_sched_wakeup_trace(struct trace_array *tr,
976                            struct trace_array_cpu *data,
977                            struct task_struct *wakee,
978                            struct task_struct *curr,
979                            unsigned long flags, int pc)
980 {
981         struct ring_buffer_event *event;
982         struct ctx_switch_entry *entry;
983         unsigned long irq_flags;
984
985         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
986                                            &irq_flags);
987         if (!event)
988                 return;
989         entry   = ring_buffer_event_data(event);
990         tracing_generic_entry_update(&entry->ent, flags, pc);
991         entry->ent.type                 = TRACE_WAKE;
992         entry->prev_pid                 = curr->pid;
993         entry->prev_prio                = curr->prio;
994         entry->prev_state               = curr->state;
995         entry->next_pid                 = wakee->pid;
996         entry->next_prio                = wakee->prio;
997         entry->next_state               = wakee->state;
998         entry->next_cpu                 = task_cpu(wakee);
999         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1000         ftrace_trace_stack(tr, data, flags, 6, pc);
1001
1002         trace_wake_up();
1003 }
1004
1005 void
1006 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1007 {
1008         struct trace_array *tr = &global_trace;
1009         struct trace_array_cpu *data;
1010         unsigned long flags;
1011         int cpu;
1012         int pc;
1013
1014         if (tracing_disabled)
1015                 return;
1016
1017         pc = preempt_count();
1018         local_irq_save(flags);
1019         cpu = raw_smp_processor_id();
1020         data = tr->data[cpu];
1021
1022         if (likely(atomic_inc_return(&data->disabled) == 1))
1023                 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1024
1025         atomic_dec(&data->disabled);
1026         local_irq_restore(flags);
1027 }
1028
1029 #ifdef CONFIG_FUNCTION_TRACER
1030 static void
1031 function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1032 {
1033         struct trace_array *tr = &global_trace;
1034         struct trace_array_cpu *data;
1035         unsigned long flags;
1036         long disabled;
1037         int cpu, resched;
1038         int pc;
1039
1040         if (unlikely(!ftrace_function_enabled))
1041                 return;
1042
1043         pc = preempt_count();
1044         resched = ftrace_preempt_disable();
1045         local_save_flags(flags);
1046         cpu = raw_smp_processor_id();
1047         data = tr->data[cpu];
1048         disabled = atomic_inc_return(&data->disabled);
1049
1050         if (likely(disabled == 1))
1051                 trace_function(tr, data, ip, parent_ip, flags, pc);
1052
1053         atomic_dec(&data->disabled);
1054         ftrace_preempt_enable(resched);
1055 }
1056
1057 static void
1058 function_trace_call(unsigned long ip, unsigned long parent_ip)
1059 {
1060         struct trace_array *tr = &global_trace;
1061         struct trace_array_cpu *data;
1062         unsigned long flags;
1063         long disabled;
1064         int cpu;
1065         int pc;
1066
1067         if (unlikely(!ftrace_function_enabled))
1068                 return;
1069
1070         /*
1071          * Need to use raw, since this must be called before the
1072          * recursive protection is performed.
1073          */
1074         local_irq_save(flags);
1075         cpu = raw_smp_processor_id();
1076         data = tr->data[cpu];
1077         disabled = atomic_inc_return(&data->disabled);
1078
1079         if (likely(disabled == 1)) {
1080                 pc = preempt_count();
1081                 trace_function(tr, data, ip, parent_ip, flags, pc);
1082         }
1083
1084         atomic_dec(&data->disabled);
1085         local_irq_restore(flags);
1086 }
1087
1088 #ifdef CONFIG_FUNCTION_RET_TRACER
1089 void trace_function_return(struct ftrace_retfunc *trace)
1090 {
1091         struct trace_array *tr = &global_trace;
1092         struct trace_array_cpu *data;
1093         unsigned long flags;
1094         long disabled;
1095         int cpu;
1096         int pc;
1097
1098         raw_local_irq_save(flags);
1099         cpu = raw_smp_processor_id();
1100         data = tr->data[cpu];
1101         disabled = atomic_inc_return(&data->disabled);
1102         if (likely(disabled == 1)) {
1103                 pc = preempt_count();
1104                 __trace_function_return(tr, data, trace, flags, pc);
1105         }
1106         atomic_dec(&data->disabled);
1107         raw_local_irq_restore(flags);
1108 }
1109 #endif /* CONFIG_FUNCTION_RET_TRACER */
1110
1111 static struct ftrace_ops trace_ops __read_mostly =
1112 {
1113         .func = function_trace_call,
1114 };
1115
1116 void tracing_start_function_trace(void)
1117 {
1118         ftrace_function_enabled = 0;
1119
1120         if (trace_flags & TRACE_ITER_PREEMPTONLY)
1121                 trace_ops.func = function_trace_call_preempt_only;
1122         else
1123                 trace_ops.func = function_trace_call;
1124
1125         register_ftrace_function(&trace_ops);
1126         ftrace_function_enabled = 1;
1127 }
1128
1129 void tracing_stop_function_trace(void)
1130 {
1131         ftrace_function_enabled = 0;
1132         unregister_ftrace_function(&trace_ops);
1133 }
1134 #endif
1135
1136 enum trace_file_type {
1137         TRACE_FILE_LAT_FMT      = 1,
1138         TRACE_FILE_ANNOTATE     = 2,
1139 };
1140
1141 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1142 {
1143         /* Don't allow ftrace to trace into the ring buffers */
1144         ftrace_disable_cpu();
1145
1146         iter->idx++;
1147         if (iter->buffer_iter[iter->cpu])
1148                 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1149
1150         ftrace_enable_cpu();
1151 }
1152
1153 static struct trace_entry *
1154 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1155 {
1156         struct ring_buffer_event *event;
1157         struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1158
1159         /* Don't allow ftrace to trace into the ring buffers */
1160         ftrace_disable_cpu();
1161
1162         if (buf_iter)
1163                 event = ring_buffer_iter_peek(buf_iter, ts);
1164         else
1165                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1166
1167         ftrace_enable_cpu();
1168
1169         return event ? ring_buffer_event_data(event) : NULL;
1170 }
1171
1172 static struct trace_entry *
1173 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1174 {
1175         struct ring_buffer *buffer = iter->tr->buffer;
1176         struct trace_entry *ent, *next = NULL;
1177         u64 next_ts = 0, ts;
1178         int next_cpu = -1;
1179         int cpu;
1180
1181         for_each_tracing_cpu(cpu) {
1182
1183                 if (ring_buffer_empty_cpu(buffer, cpu))
1184                         continue;
1185
1186                 ent = peek_next_entry(iter, cpu, &ts);
1187
1188                 /*
1189                  * Pick the entry with the smallest timestamp:
1190                  */
1191                 if (ent && (!next || ts < next_ts)) {
1192                         next = ent;
1193                         next_cpu = cpu;
1194                         next_ts = ts;
1195                 }
1196         }
1197
1198         if (ent_cpu)
1199                 *ent_cpu = next_cpu;
1200
1201         if (ent_ts)
1202                 *ent_ts = next_ts;
1203
1204         return next;
1205 }
1206
1207 /* Find the next real entry, without updating the iterator itself */
1208 static struct trace_entry *
1209 find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1210 {
1211         return __find_next_entry(iter, ent_cpu, ent_ts);
1212 }
1213
1214 /* Find the next real entry, and increment the iterator to the next entry */
1215 static void *find_next_entry_inc(struct trace_iterator *iter)
1216 {
1217         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1218
1219         if (iter->ent)
1220                 trace_iterator_increment(iter, iter->cpu);
1221
1222         return iter->ent ? iter : NULL;
1223 }
1224
1225 static void trace_consume(struct trace_iterator *iter)
1226 {
1227         /* Don't allow ftrace to trace into the ring buffers */
1228         ftrace_disable_cpu();
1229         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1230         ftrace_enable_cpu();
1231 }
1232
1233 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1234 {
1235         struct trace_iterator *iter = m->private;
1236         int i = (int)*pos;
1237         void *ent;
1238
1239         (*pos)++;
1240
1241         /* can't go backwards */
1242         if (iter->idx > i)
1243                 return NULL;
1244
1245         if (iter->idx < 0)
1246                 ent = find_next_entry_inc(iter);
1247         else
1248                 ent = iter;
1249
1250         while (ent && iter->idx < i)
1251                 ent = find_next_entry_inc(iter);
1252
1253         iter->pos = *pos;
1254
1255         return ent;
1256 }
1257
1258 static void *s_start(struct seq_file *m, loff_t *pos)
1259 {
1260         struct trace_iterator *iter = m->private;
1261         void *p = NULL;
1262         loff_t l = 0;
1263         int cpu;
1264
1265         mutex_lock(&trace_types_lock);
1266
1267         if (!current_trace || current_trace != iter->trace) {
1268                 mutex_unlock(&trace_types_lock);
1269                 return NULL;
1270         }
1271
1272         atomic_inc(&trace_record_cmdline_disabled);
1273
1274         if (*pos != iter->pos) {
1275                 iter->ent = NULL;
1276                 iter->cpu = 0;
1277                 iter->idx = -1;
1278
1279                 ftrace_disable_cpu();
1280
1281                 for_each_tracing_cpu(cpu) {
1282                         ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1283                 }
1284
1285                 ftrace_enable_cpu();
1286
1287                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1288                         ;
1289
1290         } else {
1291                 l = *pos - 1;
1292                 p = s_next(m, p, &l);
1293         }
1294
1295         return p;
1296 }
1297
1298 static void s_stop(struct seq_file *m, void *p)
1299 {
1300         atomic_dec(&trace_record_cmdline_disabled);
1301         mutex_unlock(&trace_types_lock);
1302 }
1303
1304 #ifdef CONFIG_KRETPROBES
1305 static inline const char *kretprobed(const char *name)
1306 {
1307         static const char tramp_name[] = "kretprobe_trampoline";
1308         int size = sizeof(tramp_name);
1309
1310         if (strncmp(tramp_name, name, size) == 0)
1311                 return "[unknown/kretprobe'd]";
1312         return name;
1313 }
1314 #else
1315 static inline const char *kretprobed(const char *name)
1316 {
1317         return name;
1318 }
1319 #endif /* CONFIG_KRETPROBES */
1320
1321 static int
1322 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1323 {
1324 #ifdef CONFIG_KALLSYMS
1325         char str[KSYM_SYMBOL_LEN];
1326         const char *name;
1327
1328         kallsyms_lookup(address, NULL, NULL, NULL, str);
1329
1330         name = kretprobed(str);
1331
1332         return trace_seq_printf(s, fmt, name);
1333 #endif
1334         return 1;
1335 }
1336
1337 static int
1338 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1339                      unsigned long address)
1340 {
1341 #ifdef CONFIG_KALLSYMS
1342         char str[KSYM_SYMBOL_LEN];
1343         const char *name;
1344
1345         sprint_symbol(str, address);
1346         name = kretprobed(str);
1347
1348         return trace_seq_printf(s, fmt, name);
1349 #endif
1350         return 1;
1351 }
1352
1353 #ifndef CONFIG_64BIT
1354 # define IP_FMT "%08lx"
1355 #else
1356 # define IP_FMT "%016lx"
1357 #endif
1358
1359 int
1360 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1361 {
1362         int ret;
1363
1364         if (!ip)
1365                 return trace_seq_printf(s, "0");
1366
1367         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1368                 ret = seq_print_sym_offset(s, "%s", ip);
1369         else
1370                 ret = seq_print_sym_short(s, "%s", ip);
1371
1372         if (!ret)
1373                 return 0;
1374
1375         if (sym_flags & TRACE_ITER_SYM_ADDR)
1376                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1377         return ret;
1378 }
1379
1380 static void print_lat_help_header(struct seq_file *m)
1381 {
1382         seq_puts(m, "#                  _------=> CPU#            \n");
1383         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1384         seq_puts(m, "#                | / _----=> need-resched    \n");
1385         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1386         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1387         seq_puts(m, "#                |||| /                      \n");
1388         seq_puts(m, "#                |||||     delay             \n");
1389         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1390         seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
1391 }
1392
1393 static void print_func_help_header(struct seq_file *m)
1394 {
1395         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1396         seq_puts(m, "#              | |       |          |         |\n");
1397 }
1398
1399
1400 static void
1401 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1402 {
1403         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1404         struct trace_array *tr = iter->tr;
1405         struct trace_array_cpu *data = tr->data[tr->cpu];
1406         struct tracer *type = current_trace;
1407         unsigned long total;
1408         unsigned long entries;
1409         const char *name = "preemption";
1410
1411         if (type)
1412                 name = type->name;
1413
1414         entries = ring_buffer_entries(iter->tr->buffer);
1415         total = entries +
1416                 ring_buffer_overruns(iter->tr->buffer);
1417
1418         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1419                    name, UTS_RELEASE);
1420         seq_puts(m, "-----------------------------------"
1421                  "---------------------------------\n");
1422         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1423                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1424                    nsecs_to_usecs(data->saved_latency),
1425                    entries,
1426                    total,
1427                    tr->cpu,
1428 #if defined(CONFIG_PREEMPT_NONE)
1429                    "server",
1430 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1431                    "desktop",
1432 #elif defined(CONFIG_PREEMPT)
1433                    "preempt",
1434 #else
1435                    "unknown",
1436 #endif
1437                    /* These are reserved for later use */
1438                    0, 0, 0, 0);
1439 #ifdef CONFIG_SMP
1440         seq_printf(m, " #P:%d)\n", num_online_cpus());
1441 #else
1442         seq_puts(m, ")\n");
1443 #endif
1444         seq_puts(m, "    -----------------\n");
1445         seq_printf(m, "    | task: %.16s-%d "
1446                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1447                    data->comm, data->pid, data->uid, data->nice,
1448                    data->policy, data->rt_priority);
1449         seq_puts(m, "    -----------------\n");
1450
1451         if (data->critical_start) {
1452                 seq_puts(m, " => started at: ");
1453                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1454                 trace_print_seq(m, &iter->seq);
1455                 seq_puts(m, "\n => ended at:   ");
1456                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1457                 trace_print_seq(m, &iter->seq);
1458                 seq_puts(m, "\n");
1459         }
1460
1461         seq_puts(m, "\n");
1462 }
1463
1464 static void
1465 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1466 {
1467         int hardirq, softirq;
1468         char *comm;
1469
1470         comm = trace_find_cmdline(entry->pid);
1471
1472         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1473         trace_seq_printf(s, "%3d", cpu);
1474         trace_seq_printf(s, "%c%c",
1475                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1476                          (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1477                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1478
1479         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1480         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1481         if (hardirq && softirq) {
1482                 trace_seq_putc(s, 'H');
1483         } else {
1484                 if (hardirq) {
1485                         trace_seq_putc(s, 'h');
1486                 } else {
1487                         if (softirq)
1488                                 trace_seq_putc(s, 's');
1489                         else
1490                                 trace_seq_putc(s, '.');
1491                 }
1492         }
1493
1494         if (entry->preempt_count)
1495                 trace_seq_printf(s, "%x", entry->preempt_count);
1496         else
1497                 trace_seq_puts(s, ".");
1498 }
1499
1500 unsigned long preempt_mark_thresh = 100;
1501
1502 static void
1503 lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1504                     unsigned long rel_usecs)
1505 {
1506         trace_seq_printf(s, " %4lldus", abs_usecs);
1507         if (rel_usecs > preempt_mark_thresh)
1508                 trace_seq_puts(s, "!: ");
1509         else if (rel_usecs > 1)
1510                 trace_seq_puts(s, "+: ");
1511         else
1512                 trace_seq_puts(s, " : ");
1513 }
1514
1515 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1516
1517 /*
1518  * The message is supposed to contain an ending newline.
1519  * If the printing stops prematurely, try to add a newline of our own.
1520  */
1521 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1522 {
1523         struct trace_entry *ent;
1524         struct trace_field_cont *cont;
1525         bool ok = true;
1526
1527         ent = peek_next_entry(iter, iter->cpu, NULL);
1528         if (!ent || ent->type != TRACE_CONT) {
1529                 trace_seq_putc(s, '\n');
1530                 return;
1531         }
1532
1533         do {
1534                 cont = (struct trace_field_cont *)ent;
1535                 if (ok)
1536                         ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1537
1538                 ftrace_disable_cpu();
1539
1540                 if (iter->buffer_iter[iter->cpu])
1541                         ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1542                 else
1543                         ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1544
1545                 ftrace_enable_cpu();
1546
1547                 ent = peek_next_entry(iter, iter->cpu, NULL);
1548         } while (ent && ent->type == TRACE_CONT);
1549
1550         if (!ok)
1551                 trace_seq_putc(s, '\n');
1552 }
1553
1554 static void test_cpu_buff_start(struct trace_iterator *iter)
1555 {
1556         struct trace_seq *s = &iter->seq;
1557
1558         if (!(trace_flags & TRACE_ITER_ANNOTATE))
1559                 return;
1560
1561         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1562                 return;
1563
1564         if (cpu_isset(iter->cpu, iter->started))
1565                 return;
1566
1567         cpu_set(iter->cpu, iter->started);
1568         trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1569 }
1570
1571 static enum print_line_t
1572 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1573 {
1574         struct trace_seq *s = &iter->seq;
1575         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1576         struct trace_entry *next_entry;
1577         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1578         struct trace_entry *entry = iter->ent;
1579         unsigned long abs_usecs;
1580         unsigned long rel_usecs;
1581         u64 next_ts;
1582         char *comm;
1583         int S, T;
1584         int i;
1585         unsigned state;
1586
1587         if (entry->type == TRACE_CONT)
1588                 return TRACE_TYPE_HANDLED;
1589
1590         test_cpu_buff_start(iter);
1591
1592         next_entry = find_next_entry(iter, NULL, &next_ts);
1593         if (!next_entry)
1594                 next_ts = iter->ts;
1595         rel_usecs = ns2usecs(next_ts - iter->ts);
1596         abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1597
1598         if (verbose) {
1599                 comm = trace_find_cmdline(entry->pid);
1600                 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1601                                  " %ld.%03ldms (+%ld.%03ldms): ",
1602                                  comm,
1603                                  entry->pid, cpu, entry->flags,
1604                                  entry->preempt_count, trace_idx,
1605                                  ns2usecs(iter->ts),
1606                                  abs_usecs/1000,
1607                                  abs_usecs % 1000, rel_usecs/1000,
1608                                  rel_usecs % 1000);
1609         } else {
1610                 lat_print_generic(s, entry, cpu);
1611                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1612         }
1613         switch (entry->type) {
1614         case TRACE_FN: {
1615                 struct ftrace_entry *field;
1616
1617                 trace_assign_type(field, entry);
1618
1619                 seq_print_ip_sym(s, field->ip, sym_flags);
1620                 trace_seq_puts(s, " (");
1621                 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1622                 trace_seq_puts(s, ")\n");
1623                 break;
1624         }
1625         case TRACE_CTX:
1626         case TRACE_WAKE: {
1627                 struct ctx_switch_entry *field;
1628
1629                 trace_assign_type(field, entry);
1630
1631                 T = field->next_state < sizeof(state_to_char) ?
1632                         state_to_char[field->next_state] : 'X';
1633
1634                 state = field->prev_state ?
1635                         __ffs(field->prev_state) + 1 : 0;
1636                 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1637                 comm = trace_find_cmdline(field->next_pid);
1638                 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1639                                  field->prev_pid,
1640                                  field->prev_prio,
1641                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1642                                  field->next_cpu,
1643                                  field->next_pid,
1644                                  field->next_prio,
1645                                  T, comm);
1646                 break;
1647         }
1648         case TRACE_SPECIAL: {
1649                 struct special_entry *field;
1650
1651                 trace_assign_type(field, entry);
1652
1653                 trace_seq_printf(s, "# %ld %ld %ld\n",
1654                                  field->arg1,
1655                                  field->arg2,
1656                                  field->arg3);
1657                 break;
1658         }
1659         case TRACE_STACK: {
1660                 struct stack_entry *field;
1661
1662                 trace_assign_type(field, entry);
1663
1664                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1665                         if (i)
1666                                 trace_seq_puts(s, " <= ");
1667                         seq_print_ip_sym(s, field->caller[i], sym_flags);
1668                 }
1669                 trace_seq_puts(s, "\n");
1670                 break;
1671         }
1672         case TRACE_PRINT: {
1673                 struct print_entry *field;
1674
1675                 trace_assign_type(field, entry);
1676
1677                 seq_print_ip_sym(s, field->ip, sym_flags);
1678                 trace_seq_printf(s, ": %s", field->buf);
1679                 if (entry->flags & TRACE_FLAG_CONT)
1680                         trace_seq_print_cont(s, iter);
1681                 break;
1682         }
1683         case TRACE_BRANCH: {
1684                 struct trace_branch *field;
1685
1686                 trace_assign_type(field, entry);
1687
1688                 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1689                                  field->correct ? "  ok  " : " MISS ",
1690                                  field->func,
1691                                  field->file,
1692                                  field->line);
1693                 break;
1694         }
1695         default:
1696                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1697         }
1698         return TRACE_TYPE_HANDLED;
1699 }
1700
1701 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1702 {
1703         struct trace_seq *s = &iter->seq;
1704         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1705         struct trace_entry *entry;
1706         unsigned long usec_rem;
1707         unsigned long long t;
1708         unsigned long secs;
1709         char *comm;
1710         int ret;
1711         int S, T;
1712         int i;
1713
1714         entry = iter->ent;
1715
1716         if (entry->type == TRACE_CONT)
1717                 return TRACE_TYPE_HANDLED;
1718
1719         test_cpu_buff_start(iter);
1720
1721         comm = trace_find_cmdline(iter->ent->pid);
1722
1723         t = ns2usecs(iter->ts);
1724         usec_rem = do_div(t, 1000000ULL);
1725         secs = (unsigned long)t;
1726
1727         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1728         if (!ret)
1729                 return TRACE_TYPE_PARTIAL_LINE;
1730         ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1731         if (!ret)
1732                 return TRACE_TYPE_PARTIAL_LINE;
1733         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1734         if (!ret)
1735                 return TRACE_TYPE_PARTIAL_LINE;
1736
1737         switch (entry->type) {
1738         case TRACE_FN: {
1739                 struct ftrace_entry *field;
1740
1741                 trace_assign_type(field, entry);
1742
1743                 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1744                 if (!ret)
1745                         return TRACE_TYPE_PARTIAL_LINE;
1746                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1747                                                 field->parent_ip) {
1748                         ret = trace_seq_printf(s, " <-");
1749                         if (!ret)
1750                                 return TRACE_TYPE_PARTIAL_LINE;
1751                         ret = seq_print_ip_sym(s,
1752                                                field->parent_ip,
1753                                                sym_flags);
1754                         if (!ret)
1755                                 return TRACE_TYPE_PARTIAL_LINE;
1756                 }
1757                 ret = trace_seq_printf(s, "\n");
1758                 if (!ret)
1759                         return TRACE_TYPE_PARTIAL_LINE;
1760                 break;
1761         }
1762         case TRACE_CTX:
1763         case TRACE_WAKE: {
1764                 struct ctx_switch_entry *field;
1765
1766                 trace_assign_type(field, entry);
1767
1768                 S = field->prev_state < sizeof(state_to_char) ?
1769                         state_to_char[field->prev_state] : 'X';
1770                 T = field->next_state < sizeof(state_to_char) ?
1771                         state_to_char[field->next_state] : 'X';
1772                 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1773                                        field->prev_pid,
1774                                        field->prev_prio,
1775                                        S,
1776                                        entry->type == TRACE_CTX ? "==>" : "  +",
1777                                        field->next_cpu,
1778                                        field->next_pid,
1779                                        field->next_prio,
1780                                        T);
1781                 if (!ret)
1782                         return TRACE_TYPE_PARTIAL_LINE;
1783                 break;
1784         }
1785         case TRACE_SPECIAL: {
1786                 struct special_entry *field;
1787
1788                 trace_assign_type(field, entry);
1789
1790                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1791                                  field->arg1,
1792                                  field->arg2,
1793                                  field->arg3);
1794                 if (!ret)
1795                         return TRACE_TYPE_PARTIAL_LINE;
1796                 break;
1797         }
1798         case TRACE_STACK: {
1799                 struct stack_entry *field;
1800
1801                 trace_assign_type(field, entry);
1802
1803                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1804                         if (i) {
1805                                 ret = trace_seq_puts(s, " <= ");
1806                                 if (!ret)
1807                                         return TRACE_TYPE_PARTIAL_LINE;
1808                         }
1809                         ret = seq_print_ip_sym(s, field->caller[i],
1810                                                sym_flags);
1811                         if (!ret)
1812                                 return TRACE_TYPE_PARTIAL_LINE;
1813                 }
1814                 ret = trace_seq_puts(s, "\n");
1815                 if (!ret)
1816                         return TRACE_TYPE_PARTIAL_LINE;
1817                 break;
1818         }
1819         case TRACE_PRINT: {
1820                 struct print_entry *field;
1821
1822                 trace_assign_type(field, entry);
1823
1824                 seq_print_ip_sym(s, field->ip, sym_flags);
1825                 trace_seq_printf(s, ": %s", field->buf);
1826                 if (entry->flags & TRACE_FLAG_CONT)
1827                         trace_seq_print_cont(s, iter);
1828                 break;
1829         }
1830         case TRACE_FN_RET: {
1831                 return print_return_function(iter);
1832                 break;
1833         }
1834         case TRACE_BRANCH: {
1835                 struct trace_branch *field;
1836
1837                 trace_assign_type(field, entry);
1838
1839                 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1840                                  field->correct ? "  ok  " : " MISS ",
1841                                  field->func,
1842                                  field->file,
1843                                  field->line);
1844                 break;
1845         }
1846         }
1847         return TRACE_TYPE_HANDLED;
1848 }
1849
1850 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1851 {
1852         struct trace_seq *s = &iter->seq;
1853         struct trace_entry *entry;
1854         int ret;
1855         int S, T;
1856
1857         entry = iter->ent;
1858
1859         if (entry->type == TRACE_CONT)
1860                 return TRACE_TYPE_HANDLED;
1861
1862         ret = trace_seq_printf(s, "%d %d %llu ",
1863                 entry->pid, iter->cpu, iter->ts);
1864         if (!ret)
1865                 return TRACE_TYPE_PARTIAL_LINE;
1866
1867         switch (entry->type) {
1868         case TRACE_FN: {
1869                 struct ftrace_entry *field;
1870
1871                 trace_assign_type(field, entry);
1872
1873                 ret = trace_seq_printf(s, "%x %x\n",
1874                                         field->ip,
1875                                         field->parent_ip);
1876                 if (!ret)
1877                         return TRACE_TYPE_PARTIAL_LINE;
1878                 break;
1879         }
1880         case TRACE_CTX:
1881         case TRACE_WAKE: {
1882                 struct ctx_switch_entry *field;
1883
1884                 trace_assign_type(field, entry);
1885
1886                 S = field->prev_state < sizeof(state_to_char) ?
1887                         state_to_char[field->prev_state] : 'X';
1888                 T = field->next_state < sizeof(state_to_char) ?
1889                         state_to_char[field->next_state] : 'X';
1890                 if (entry->type == TRACE_WAKE)
1891                         S = '+';
1892                 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1893                                        field->prev_pid,
1894                                        field->prev_prio,
1895                                        S,
1896                                        field->next_cpu,
1897                                        field->next_pid,
1898                                        field->next_prio,
1899                                        T);
1900                 if (!ret)
1901                         return TRACE_TYPE_PARTIAL_LINE;
1902                 break;
1903         }
1904         case TRACE_SPECIAL:
1905         case TRACE_STACK: {
1906                 struct special_entry *field;
1907
1908                 trace_assign_type(field, entry);
1909
1910                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1911                                  field->arg1,
1912                                  field->arg2,
1913                                  field->arg3);
1914                 if (!ret)
1915                         return TRACE_TYPE_PARTIAL_LINE;
1916                 break;
1917         }
1918         case TRACE_PRINT: {
1919                 struct print_entry *field;
1920
1921                 trace_assign_type(field, entry);
1922
1923                 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
1924                 if (entry->flags & TRACE_FLAG_CONT)
1925                         trace_seq_print_cont(s, iter);
1926                 break;
1927         }
1928         }
1929         return TRACE_TYPE_HANDLED;
1930 }
1931
1932 #define SEQ_PUT_FIELD_RET(s, x)                         \
1933 do {                                                    \
1934         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1935                 return 0;                               \
1936 } while (0)
1937
1938 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1939 do {                                                    \
1940         BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);     \
1941         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1942                 return 0;                               \
1943 } while (0)
1944
1945 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1946 {
1947         struct trace_seq *s = &iter->seq;
1948         unsigned char newline = '\n';
1949         struct trace_entry *entry;
1950         int S, T;
1951
1952         entry = iter->ent;
1953
1954         if (entry->type == TRACE_CONT)
1955                 return TRACE_TYPE_HANDLED;
1956
1957         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1958         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1959         SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1960
1961         switch (entry->type) {
1962         case TRACE_FN: {
1963                 struct ftrace_entry *field;
1964
1965                 trace_assign_type(field, entry);
1966
1967                 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
1968                 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1969                 break;
1970         }
1971         case TRACE_CTX:
1972         case TRACE_WAKE: {
1973                 struct ctx_switch_entry *field;
1974
1975                 trace_assign_type(field, entry);
1976
1977                 S = field->prev_state < sizeof(state_to_char) ?
1978                         state_to_char[field->prev_state] : 'X';
1979                 T = field->next_state < sizeof(state_to_char) ?
1980                         state_to_char[field->next_state] : 'X';
1981                 if (entry->type == TRACE_WAKE)
1982                         S = '+';
1983                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1984                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1985                 SEQ_PUT_HEX_FIELD_RET(s, S);
1986                 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1987                 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1988                 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1989                 SEQ_PUT_HEX_FIELD_RET(s, T);
1990                 break;
1991         }
1992         case TRACE_SPECIAL:
1993         case TRACE_STACK: {
1994                 struct special_entry *field;
1995
1996                 trace_assign_type(field, entry);
1997
1998                 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1999                 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
2000                 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
2001                 break;
2002         }
2003         }
2004         SEQ_PUT_FIELD_RET(s, newline);
2005
2006         return TRACE_TYPE_HANDLED;
2007 }
2008
2009 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2010 {
2011         struct trace_seq *s = &iter->seq;
2012         struct trace_entry *entry;
2013
2014         entry = iter->ent;
2015
2016         if (entry->type == TRACE_CONT)
2017                 return TRACE_TYPE_HANDLED;
2018
2019         SEQ_PUT_FIELD_RET(s, entry->pid);
2020         SEQ_PUT_FIELD_RET(s, entry->cpu);
2021         SEQ_PUT_FIELD_RET(s, iter->ts);
2022
2023         switch (entry->type) {
2024         case TRACE_FN: {
2025                 struct ftrace_entry *field;
2026
2027                 trace_assign_type(field, entry);
2028
2029                 SEQ_PUT_FIELD_RET(s, field->ip);
2030                 SEQ_PUT_FIELD_RET(s, field->parent_ip);
2031                 break;
2032         }
2033         case TRACE_CTX: {
2034                 struct ctx_switch_entry *field;
2035
2036                 trace_assign_type(field, entry);
2037
2038                 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2039                 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2040                 SEQ_PUT_FIELD_RET(s, field->prev_state);
2041                 SEQ_PUT_FIELD_RET(s, field->next_pid);
2042                 SEQ_PUT_FIELD_RET(s, field->next_prio);
2043                 SEQ_PUT_FIELD_RET(s, field->next_state);
2044                 break;
2045         }
2046         case TRACE_SPECIAL:
2047         case TRACE_STACK: {
2048                 struct special_entry *field;
2049
2050                 trace_assign_type(field, entry);
2051
2052                 SEQ_PUT_FIELD_RET(s, field->arg1);
2053                 SEQ_PUT_FIELD_RET(s, field->arg2);
2054                 SEQ_PUT_FIELD_RET(s, field->arg3);
2055                 break;
2056         }
2057         }
2058         return 1;
2059 }
2060
2061 static int trace_empty(struct trace_iterator *iter)
2062 {
2063         int cpu;
2064
2065         for_each_tracing_cpu(cpu) {
2066                 if (iter->buffer_iter[cpu]) {
2067                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
2068                                 return 0;
2069                 } else {
2070                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
2071                                 return 0;
2072                 }
2073         }
2074
2075         return 1;
2076 }
2077
2078 static enum print_line_t print_trace_line(struct trace_iterator *iter)
2079 {
2080         enum print_line_t ret;
2081
2082         if (iter->trace && iter->trace->print_line) {
2083                 ret = iter->trace->print_line(iter);
2084                 if (ret != TRACE_TYPE_UNHANDLED)
2085                         return ret;
2086         }
2087
2088         if (trace_flags & TRACE_ITER_BIN)
2089                 return print_bin_fmt(iter);
2090
2091         if (trace_flags & TRACE_ITER_HEX)
2092                 return print_hex_fmt(iter);
2093
2094         if (trace_flags & TRACE_ITER_RAW)
2095                 return print_raw_fmt(iter);
2096
2097         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2098                 return print_lat_fmt(iter, iter->idx, iter->cpu);
2099
2100         return print_trace_fmt(iter);
2101 }
2102
2103 static int s_show(struct seq_file *m, void *v)
2104 {
2105         struct trace_iterator *iter = v;
2106
2107         if (iter->ent == NULL) {
2108                 if (iter->tr) {
2109                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2110                         seq_puts(m, "#\n");
2111                 }
2112                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2113                         /* print nothing if the buffers are empty */
2114                         if (trace_empty(iter))
2115                                 return 0;
2116                         print_trace_header(m, iter);
2117                         if (!(trace_flags & TRACE_ITER_VERBOSE))
2118                                 print_lat_help_header(m);
2119                 } else {
2120                         if (!(trace_flags & TRACE_ITER_VERBOSE))
2121                                 print_func_help_header(m);
2122                 }
2123         } else {
2124                 print_trace_line(iter);
2125                 trace_print_seq(m, &iter->seq);
2126         }
2127
2128         return 0;
2129 }
2130
2131 static struct seq_operations tracer_seq_ops = {
2132         .start          = s_start,
2133         .next           = s_next,
2134         .stop           = s_stop,
2135         .show           = s_show,
2136 };
2137
2138 static struct trace_iterator *
2139 __tracing_open(struct inode *inode, struct file *file, int *ret)
2140 {
2141         struct trace_iterator *iter;
2142         struct seq_file *m;
2143         int cpu;
2144
2145         if (tracing_disabled) {
2146                 *ret = -ENODEV;
2147                 return NULL;
2148         }
2149
2150         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2151         if (!iter) {
2152                 *ret = -ENOMEM;
2153                 goto out;
2154         }
2155
2156         mutex_lock(&trace_types_lock);
2157         if (current_trace && current_trace->print_max)
2158                 iter->tr = &max_tr;
2159         else
2160                 iter->tr = inode->i_private;
2161         iter->trace = current_trace;
2162         iter->pos = -1;
2163
2164         /* Annotate start of buffers if we had overruns */
2165         if (ring_buffer_overruns(iter->tr->buffer))
2166                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2167
2168
2169         for_each_tracing_cpu(cpu) {
2170
2171                 iter->buffer_iter[cpu] =
2172                         ring_buffer_read_start(iter->tr->buffer, cpu);
2173
2174                 if (!iter->buffer_iter[cpu])
2175                         goto fail_buffer;
2176         }
2177
2178         /* TODO stop tracer */
2179         *ret = seq_open(file, &tracer_seq_ops);
2180         if (*ret)
2181                 goto fail_buffer;
2182
2183         m = file->private_data;
2184         m->private = iter;
2185
2186         /* stop the trace while dumping */
2187         tracing_stop();
2188
2189         if (iter->trace && iter->trace->open)
2190                         iter->trace->open(iter);
2191
2192         mutex_unlock(&trace_types_lock);
2193
2194  out:
2195         return iter;
2196
2197  fail_buffer:
2198         for_each_tracing_cpu(cpu) {
2199                 if (iter->buffer_iter[cpu])
2200                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2201         }
2202         mutex_unlock(&trace_types_lock);
2203
2204         return ERR_PTR(-ENOMEM);
2205 }
2206
2207 int tracing_open_generic(struct inode *inode, struct file *filp)
2208 {
2209         if (tracing_disabled)
2210                 return -ENODEV;
2211
2212         filp->private_data = inode->i_private;
2213         return 0;
2214 }
2215
2216 int tracing_release(struct inode *inode, struct file *file)
2217 {
2218         struct seq_file *m = (struct seq_file *)file->private_data;
2219         struct trace_iterator *iter = m->private;
2220         int cpu;
2221
2222         mutex_lock(&trace_types_lock);
2223         for_each_tracing_cpu(cpu) {
2224                 if (iter->buffer_iter[cpu])
2225                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2226         }
2227
2228         if (iter->trace && iter->trace->close)
2229                 iter->trace->close(iter);
2230
2231         /* reenable tracing if it was previously enabled */
2232         tracing_start();
2233         mutex_unlock(&trace_types_lock);
2234
2235         seq_release(inode, file);
2236         kfree(iter);
2237         return 0;
2238 }
2239
2240 static int tracing_open(struct inode *inode, struct file *file)
2241 {
2242         int ret;
2243
2244         __tracing_open(inode, file, &ret);
2245
2246         return ret;
2247 }
2248
2249 static int tracing_lt_open(struct inode *inode, struct file *file)
2250 {
2251         struct trace_iterator *iter;
2252         int ret;
2253
2254         iter = __tracing_open(inode, file, &ret);
2255
2256         if (!ret)
2257                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2258
2259         return ret;
2260 }
2261
2262
2263 static void *
2264 t_next(struct seq_file *m, void *v, loff_t *pos)
2265 {
2266         struct tracer *t = m->private;
2267
2268         (*pos)++;
2269
2270         if (t)
2271                 t = t->next;
2272
2273         m->private = t;
2274
2275         return t;
2276 }
2277
2278 static void *t_start(struct seq_file *m, loff_t *pos)
2279 {
2280         struct tracer *t = m->private;
2281         loff_t l = 0;
2282
2283         mutex_lock(&trace_types_lock);
2284         for (; t && l < *pos; t = t_next(m, t, &l))
2285                 ;
2286
2287         return t;
2288 }
2289
2290 static void t_stop(struct seq_file *m, void *p)
2291 {
2292         mutex_unlock(&trace_types_lock);
2293 }
2294
2295 static int t_show(struct seq_file *m, void *v)
2296 {
2297         struct tracer *t = v;
2298
2299         if (!t)
2300                 return 0;
2301
2302         seq_printf(m, "%s", t->name);
2303         if (t->next)
2304                 seq_putc(m, ' ');
2305         else
2306                 seq_putc(m, '\n');
2307
2308         return 0;
2309 }
2310
2311 static struct seq_operations show_traces_seq_ops = {
2312         .start          = t_start,
2313         .next           = t_next,
2314         .stop           = t_stop,
2315         .show           = t_show,
2316 };
2317
2318 static int show_traces_open(struct inode *inode, struct file *file)
2319 {
2320         int ret;
2321
2322         if (tracing_disabled)
2323                 return -ENODEV;
2324
2325         ret = seq_open(file, &show_traces_seq_ops);
2326         if (!ret) {
2327                 struct seq_file *m = file->private_data;
2328                 m->private = trace_types;
2329         }
2330
2331         return ret;
2332 }
2333
2334 static struct file_operations tracing_fops = {
2335         .open           = tracing_open,
2336         .read           = seq_read,
2337         .llseek         = seq_lseek,
2338         .release        = tracing_release,
2339 };
2340
2341 static struct file_operations tracing_lt_fops = {
2342         .open           = tracing_lt_open,
2343         .read           = seq_read,
2344         .llseek         = seq_lseek,
2345         .release        = tracing_release,
2346 };
2347
2348 static struct file_operations show_traces_fops = {
2349         .open           = show_traces_open,
2350         .read           = seq_read,
2351         .release        = seq_release,
2352 };
2353
2354 /*
2355  * Only trace on a CPU if the bitmask is set:
2356  */
2357 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2358
2359 /*
2360  * When tracing/tracing_cpu_mask is modified then this holds
2361  * the new bitmask we are about to install:
2362  */
2363 static cpumask_t tracing_cpumask_new;
2364
2365 /*
2366  * The tracer itself will not take this lock, but still we want
2367  * to provide a consistent cpumask to user-space:
2368  */
2369 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2370
2371 /*
2372  * Temporary storage for the character representation of the
2373  * CPU bitmask (and one more byte for the newline):
2374  */
2375 static char mask_str[NR_CPUS + 1];
2376
2377 static ssize_t
2378 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2379                      size_t count, loff_t *ppos)
2380 {
2381         int len;
2382
2383         mutex_lock(&tracing_cpumask_update_lock);
2384
2385         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2386         if (count - len < 2) {
2387                 count = -EINVAL;
2388                 goto out_err;
2389         }
2390         len += sprintf(mask_str + len, "\n");
2391         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2392
2393 out_err:
2394         mutex_unlock(&tracing_cpumask_update_lock);
2395
2396         return count;
2397 }
2398
2399 static ssize_t
2400 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2401                       size_t count, loff_t *ppos)
2402 {
2403         int err, cpu;
2404
2405         mutex_lock(&tracing_cpumask_update_lock);
2406         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2407         if (err)
2408                 goto err_unlock;
2409
2410         raw_local_irq_disable();
2411         __raw_spin_lock(&ftrace_max_lock);
2412         for_each_tracing_cpu(cpu) {
2413                 /*
2414                  * Increase/decrease the disabled counter if we are
2415                  * about to flip a bit in the cpumask:
2416                  */
2417                 if (cpu_isset(cpu, tracing_cpumask) &&
2418                                 !cpu_isset(cpu, tracing_cpumask_new)) {
2419                         atomic_inc(&global_trace.data[cpu]->disabled);
2420                 }
2421                 if (!cpu_isset(cpu, tracing_cpumask) &&
2422                                 cpu_isset(cpu, tracing_cpumask_new)) {
2423                         atomic_dec(&global_trace.data[cpu]->disabled);
2424                 }
2425         }
2426         __raw_spin_unlock(&ftrace_max_lock);
2427         raw_local_irq_enable();
2428
2429         tracing_cpumask = tracing_cpumask_new;
2430
2431         mutex_unlock(&tracing_cpumask_update_lock);
2432
2433         return count;
2434
2435 err_unlock:
2436         mutex_unlock(&tracing_cpumask_update_lock);
2437
2438         return err;
2439 }
2440
2441 static struct file_operations tracing_cpumask_fops = {
2442         .open           = tracing_open_generic,
2443         .read           = tracing_cpumask_read,
2444         .write          = tracing_cpumask_write,
2445 };
2446
2447 static ssize_t
2448 tracing_trace_options_read(struct file *filp, char __user *ubuf,
2449                        size_t cnt, loff_t *ppos)
2450 {
2451         int i;
2452         char *buf;
2453         int r = 0;
2454         int len = 0;
2455         u32 tracer_flags = current_trace->flags->val;
2456         struct tracer_opt *trace_opts = current_trace->flags->opts;
2457
2458
2459         /* calulate max size */
2460         for (i = 0; trace_options[i]; i++) {
2461                 len += strlen(trace_options[i]);
2462                 len += 3; /* "no" and space */
2463         }
2464
2465         /*
2466          * Increase the size with names of options specific
2467          * of the current tracer.
2468          */
2469         for (i = 0; trace_opts[i].name; i++) {
2470                 len += strlen(trace_opts[i].name);
2471                 len += 3; /* "no" and space */
2472         }
2473
2474         /* +2 for \n and \0 */
2475         buf = kmalloc(len + 2, GFP_KERNEL);
2476         if (!buf)
2477                 return -ENOMEM;
2478
2479         for (i = 0; trace_options[i]; i++) {
2480                 if (trace_flags & (1 << i))
2481                         r += sprintf(buf + r, "%s ", trace_options[i]);
2482                 else
2483                         r += sprintf(buf + r, "no%s ", trace_options[i]);
2484         }
2485
2486         for (i = 0; trace_opts[i].name; i++) {
2487                 if (tracer_flags & trace_opts[i].bit)
2488                         r += sprintf(buf + r, "%s ",
2489                                 trace_opts[i].name);
2490                 else
2491                         r += sprintf(buf + r, "no%s ",
2492                                 trace_opts[i].name);
2493         }
2494
2495         r += sprintf(buf + r, "\n");
2496         WARN_ON(r >= len + 2);
2497
2498         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2499
2500         kfree(buf);
2501
2502         return r;
2503 }
2504
2505 /* Try to assign a tracer specific option */
2506 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2507 {
2508         struct tracer_flags *trace_flags = trace->flags;
2509         struct tracer_opt *opts = NULL;
2510         int ret = 0, i = 0;
2511         int len;
2512
2513         for (i = 0; trace_flags->opts[i].name; i++) {
2514                 opts = &trace_flags->opts[i];
2515                 len = strlen(opts->name);
2516
2517                 if (strncmp(cmp, opts->name, len) == 0) {
2518                         ret = trace->set_flag(trace_flags->val,
2519                                 opts->bit, !neg);
2520                         break;
2521                 }
2522         }
2523         /* Not found */
2524         if (!trace_flags->opts[i].name)
2525                 return -EINVAL;
2526
2527         /* Refused to handle */
2528         if (ret)
2529                 return ret;
2530
2531         if (neg)
2532                 trace_flags->val &= ~opts->bit;
2533         else
2534                 trace_flags->val |= opts->bit;
2535
2536         return 0;
2537 }
2538
2539 static ssize_t
2540 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2541                         size_t cnt, loff_t *ppos)
2542 {
2543         char buf[64];
2544         char *cmp = buf;
2545         int neg = 0;
2546         int ret;
2547         int i;
2548
2549         if (cnt >= sizeof(buf))
2550                 return -EINVAL;
2551
2552         if (copy_from_user(&buf, ubuf, cnt))
2553                 return -EFAULT;
2554
2555         buf[cnt] = 0;
2556
2557         if (strncmp(buf, "no", 2) == 0) {
2558                 neg = 1;
2559                 cmp += 2;
2560         }
2561
2562         for (i = 0; trace_options[i]; i++) {
2563                 int len = strlen(trace_options[i]);
2564
2565                 if (strncmp(cmp, trace_options[i], len) == 0) {
2566                         if (neg)
2567                                 trace_flags &= ~(1 << i);
2568                         else
2569                                 trace_flags |= (1 << i);
2570                         break;
2571                 }
2572         }
2573
2574         /* If no option could be set, test the specific tracer options */
2575         if (!trace_options[i]) {
2576                 ret = set_tracer_option(current_trace, cmp, neg);
2577                 if (ret)
2578                         return ret;
2579         }
2580
2581         filp->f_pos += cnt;
2582
2583         return cnt;
2584 }
2585
2586 static struct file_operations tracing_iter_fops = {
2587         .open           = tracing_open_generic,
2588         .read           = tracing_trace_options_read,
2589         .write          = tracing_trace_options_write,
2590 };
2591
2592 static const char readme_msg[] =
2593         "tracing mini-HOWTO:\n\n"
2594         "# mkdir /debug\n"
2595         "# mount -t debugfs nodev /debug\n\n"
2596         "# cat /debug/tracing/available_tracers\n"
2597         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2598         "# cat /debug/tracing/current_tracer\n"
2599         "none\n"
2600         "# echo sched_switch > /debug/tracing/current_tracer\n"
2601         "# cat /debug/tracing/current_tracer\n"
2602         "sched_switch\n"
2603         "# cat /debug/tracing/trace_options\n"
2604         "noprint-parent nosym-offset nosym-addr noverbose\n"
2605         "# echo print-parent > /debug/tracing/trace_options\n"
2606         "# echo 1 > /debug/tracing/tracing_enabled\n"
2607         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2608         "echo 0 > /debug/tracing/tracing_enabled\n"
2609 ;
2610
2611 static ssize_t
2612 tracing_readme_read(struct file *filp, char __user *ubuf,
2613                        size_t cnt, loff_t *ppos)
2614 {
2615         return simple_read_from_buffer(ubuf, cnt, ppos,
2616                                         readme_msg, strlen(readme_msg));
2617 }
2618
2619 static struct file_operations tracing_readme_fops = {
2620         .open           = tracing_open_generic,
2621         .read           = tracing_readme_read,
2622 };
2623
2624 static ssize_t
2625 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2626                   size_t cnt, loff_t *ppos)
2627 {
2628         char buf[64];
2629         int r;
2630
2631         r = sprintf(buf, "%u\n", tracer_enabled);
2632         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2633 }
2634
2635 static ssize_t
2636 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2637                    size_t cnt, loff_t *ppos)
2638 {
2639         struct trace_array *tr = filp->private_data;
2640         char buf[64];
2641         long val;
2642         int ret;
2643
2644         if (cnt >= sizeof(buf))
2645                 return -EINVAL;
2646
2647         if (copy_from_user(&buf, ubuf, cnt))
2648                 return -EFAULT;
2649
2650         buf[cnt] = 0;
2651
2652         ret = strict_strtoul(buf, 10, &val);
2653         if (ret < 0)
2654                 return ret;
2655
2656         val = !!val;
2657
2658         mutex_lock(&trace_types_lock);
2659         if (tracer_enabled ^ val) {
2660                 if (val) {
2661                         tracer_enabled = 1;
2662                         if (current_trace->start)
2663                                 current_trace->start(tr);
2664                         tracing_start();
2665                 } else {
2666                         tracer_enabled = 0;
2667                         tracing_stop();
2668                         if (current_trace->stop)
2669                                 current_trace->stop(tr);
2670                 }
2671         }
2672         mutex_unlock(&trace_types_lock);
2673
2674         filp->f_pos += cnt;
2675
2676         return cnt;
2677 }
2678
2679 static ssize_t
2680 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2681                        size_t cnt, loff_t *ppos)
2682 {
2683         char buf[max_tracer_type_len+2];
2684         int r;
2685
2686         mutex_lock(&trace_types_lock);
2687         if (current_trace)
2688                 r = sprintf(buf, "%s\n", current_trace->name);
2689         else
2690                 r = sprintf(buf, "\n");
2691         mutex_unlock(&trace_types_lock);
2692
2693         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2694 }
2695
2696 static int tracing_set_tracer(char *buf)
2697 {
2698         struct trace_array *tr = &global_trace;
2699         struct tracer *t;
2700         int ret = 0;
2701
2702         mutex_lock(&trace_types_lock);
2703         for (t = trace_types; t; t = t->next) {
2704                 if (strcmp(t->name, buf) == 0)
2705                         break;
2706         }
2707         if (!t) {
2708                 ret = -EINVAL;
2709                 goto out;
2710         }
2711         if (t == current_trace)
2712                 goto out;
2713
2714         trace_branch_disable();
2715         if (current_trace && current_trace->reset)
2716                 current_trace->reset(tr);
2717
2718         current_trace = t;
2719         if (t->init) {
2720                 ret = t->init(tr);
2721                 if (ret)
2722                         goto out;
2723         }
2724
2725         trace_branch_enable(tr);
2726  out:
2727         mutex_unlock(&trace_types_lock);
2728
2729         return ret;
2730 }
2731
2732 static ssize_t
2733 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2734                         size_t cnt, loff_t *ppos)
2735 {
2736         char buf[max_tracer_type_len+1];
2737         int i;
2738         size_t ret;
2739         int err;
2740
2741         ret = cnt;
2742
2743         if (cnt > max_tracer_type_len)
2744                 cnt = max_tracer_type_len;
2745
2746         if (copy_from_user(&buf, ubuf, cnt))
2747                 return -EFAULT;
2748
2749         buf[cnt] = 0;
2750
2751         /* strip ending whitespace. */
2752         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2753                 buf[i] = 0;
2754
2755         err = tracing_set_tracer(buf);
2756         if (err)
2757                 return err;
2758
2759         filp->f_pos += ret;
2760
2761         return ret;
2762 }
2763
2764 static ssize_t
2765 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2766                      size_t cnt, loff_t *ppos)
2767 {
2768         unsigned long *ptr = filp->private_data;
2769         char buf[64];
2770         int r;
2771
2772         r = snprintf(buf, sizeof(buf), "%ld\n",
2773                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2774         if (r > sizeof(buf))
2775                 r = sizeof(buf);
2776         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2777 }
2778
2779 static ssize_t
2780 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2781                       size_t cnt, loff_t *ppos)
2782 {
2783         long *ptr = filp->private_data;
2784         char buf[64];
2785         long val;
2786         int ret;
2787
2788         if (cnt >= sizeof(buf))
2789                 return -EINVAL;
2790
2791         if (copy_from_user(&buf, ubuf, cnt))
2792                 return -EFAULT;
2793
2794         buf[cnt] = 0;
2795
2796         ret = strict_strtoul(buf, 10, &val);
2797         if (ret < 0)
2798                 return ret;
2799
2800         *ptr = val * 1000;
2801
2802         return cnt;
2803 }
2804
2805 static atomic_t tracing_reader;
2806
2807 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2808 {
2809         struct trace_iterator *iter;
2810
2811         if (tracing_disabled)
2812                 return -ENODEV;
2813
2814         /* We only allow for reader of the pipe */
2815         if (atomic_inc_return(&tracing_reader) != 1) {
2816                 atomic_dec(&tracing_reader);
2817                 return -EBUSY;
2818         }
2819
2820         /* create a buffer to store the information to pass to userspace */
2821         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2822         if (!iter)
2823                 return -ENOMEM;
2824
2825         mutex_lock(&trace_types_lock);
2826
2827         /* trace pipe does not show start of buffer */
2828         cpus_setall(iter->started);
2829
2830         iter->tr = &global_trace;
2831         iter->trace = current_trace;
2832         filp->private_data = iter;
2833
2834         if (iter->trace->pipe_open)
2835                 iter->trace->pipe_open(iter);
2836         mutex_unlock(&trace_types_lock);
2837
2838         return 0;
2839 }
2840
2841 static int tracing_release_pipe(struct inode *inode, struct file *file)
2842 {
2843         struct trace_iterator *iter = file->private_data;
2844
2845         kfree(iter);
2846         atomic_dec(&tracing_reader);
2847
2848         return 0;
2849 }
2850
2851 static unsigned int
2852 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2853 {
2854         struct trace_iterator *iter = filp->private_data;
2855
2856         if (trace_flags & TRACE_ITER_BLOCK) {
2857                 /*
2858                  * Always select as readable when in blocking mode
2859                  */
2860                 return POLLIN | POLLRDNORM;
2861         } else {
2862                 if (!trace_empty(iter))
2863                         return POLLIN | POLLRDNORM;
2864                 poll_wait(filp, &trace_wait, poll_table);
2865                 if (!trace_empty(iter))
2866                         return POLLIN | POLLRDNORM;
2867
2868                 return 0;
2869         }
2870 }
2871
2872 /*
2873  * Consumer reader.
2874  */
2875 static ssize_t
2876 tracing_read_pipe(struct file *filp, char __user *ubuf,
2877                   size_t cnt, loff_t *ppos)
2878 {
2879         struct trace_iterator *iter = filp->private_data;
2880         ssize_t sret;
2881
2882         /* return any leftover data */
2883         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2884         if (sret != -EBUSY)
2885                 return sret;
2886
2887         trace_seq_reset(&iter->seq);
2888
2889         mutex_lock(&trace_types_lock);
2890         if (iter->trace->read) {
2891                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2892                 if (sret)
2893                         goto out;
2894         }
2895
2896 waitagain:
2897         sret = 0;
2898         while (trace_empty(iter)) {
2899
2900                 if ((filp->f_flags & O_NONBLOCK)) {
2901                         sret = -EAGAIN;
2902                         goto out;
2903                 }
2904
2905                 /*
2906                  * This is a make-shift waitqueue. The reason we don't use
2907                  * an actual wait queue is because:
2908                  *  1) we only ever have one waiter
2909                  *  2) the tracing, traces all functions, we don't want
2910                  *     the overhead of calling wake_up and friends
2911                  *     (and tracing them too)
2912                  *     Anyway, this is really very primitive wakeup.
2913                  */
2914                 set_current_state(TASK_INTERRUPTIBLE);
2915                 iter->tr->waiter = current;
2916
2917                 mutex_unlock(&trace_types_lock);
2918
2919                 /* sleep for 100 msecs, and try again. */
2920                 schedule_timeout(HZ/10);
2921
2922                 mutex_lock(&trace_types_lock);
2923
2924                 iter->tr->waiter = NULL;
2925
2926                 if (signal_pending(current)) {
2927                         sret = -EINTR;
2928                         goto out;
2929                 }
2930
2931                 if (iter->trace != current_trace)
2932                         goto out;
2933
2934                 /*
2935                  * We block until we read something and tracing is disabled.
2936                  * We still block if tracing is disabled, but we have never
2937                  * read anything. This allows a user to cat this file, and
2938                  * then enable tracing. But after we have read something,
2939                  * we give an EOF when tracing is again disabled.
2940                  *
2941                  * iter->pos will be 0 if we haven't read anything.
2942                  */
2943                 if (!tracer_enabled && iter->pos)
2944                         break;
2945
2946                 continue;
2947         }
2948
2949         /* stop when tracing is finished */
2950         if (trace_empty(iter))
2951                 goto out;
2952
2953         if (cnt >= PAGE_SIZE)
2954                 cnt = PAGE_SIZE - 1;
2955
2956         /* reset all but tr, trace, and overruns */
2957         memset(&iter->seq, 0,
2958                sizeof(struct trace_iterator) -
2959                offsetof(struct trace_iterator, seq));
2960         iter->pos = -1;
2961
2962         while (find_next_entry_inc(iter) != NULL) {
2963                 enum print_line_t ret;
2964                 int len = iter->seq.len;
2965
2966                 ret = print_trace_line(iter);
2967                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2968                         /* don't print partial lines */
2969                         iter->seq.len = len;
2970                         break;
2971                 }
2972
2973                 trace_consume(iter);
2974
2975                 if (iter->seq.len >= cnt)
2976                         break;
2977         }
2978
2979         /* Now copy what we have to the user */
2980         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2981         if (iter->seq.readpos >= iter->seq.len)
2982                 trace_seq_reset(&iter->seq);
2983
2984         /*
2985          * If there was nothing to send to user, inspite of consuming trace
2986          * entries, go back to wait for more entries.
2987          */
2988         if (sret == -EBUSY)
2989                 goto waitagain;
2990
2991 out:
2992         mutex_unlock(&trace_types_lock);
2993
2994         return sret;
2995 }
2996
2997 static ssize_t
2998 tracing_entries_read(struct file *filp, char __user *ubuf,
2999                      size_t cnt, loff_t *ppos)
3000 {
3001         struct trace_array *tr = filp->private_data;
3002         char buf[64];
3003         int r;
3004
3005         r = sprintf(buf, "%lu\n", tr->entries >> 10);
3006         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3007 }
3008
3009 static ssize_t
3010 tracing_entries_write(struct file *filp, const char __user *ubuf,
3011                       size_t cnt, loff_t *ppos)
3012 {
3013         unsigned long val;
3014         char buf[64];
3015         int ret, cpu;
3016
3017         if (cnt >= sizeof(buf))
3018                 return -EINVAL;
3019
3020         if (copy_from_user(&buf, ubuf, cnt))
3021                 return -EFAULT;
3022
3023         buf[cnt] = 0;
3024
3025         ret = strict_strtoul(buf, 10, &val);
3026         if (ret < 0)
3027                 return ret;
3028
3029         /* must have at least 1 entry */
3030         if (!val)
3031                 return -EINVAL;
3032
3033         mutex_lock(&trace_types_lock);
3034
3035         tracing_stop();
3036
3037         /* disable all cpu buffers */
3038         for_each_tracing_cpu(cpu) {
3039                 if (global_trace.data[cpu])
3040                         atomic_inc(&global_trace.data[cpu]->disabled);
3041                 if (max_tr.data[cpu])
3042                         atomic_inc(&max_tr.data[cpu]->disabled);
3043         }
3044
3045         /* value is in KB */
3046         val <<= 10;
3047
3048         if (val != global_trace.entries) {
3049                 ret = ring_buffer_resize(global_trace.buffer, val);
3050                 if (ret < 0) {
3051                         cnt = ret;
3052                         goto out;
3053                 }
3054
3055                 ret = ring_buffer_resize(max_tr.buffer, val);
3056                 if (ret < 0) {
3057                         int r;
3058                         cnt = ret;
3059                         r = ring_buffer_resize(global_trace.buffer,
3060                                                global_trace.entries);
3061                         if (r < 0) {
3062                                 /* AARGH! We are left with different
3063                                  * size max buffer!!!! */
3064                                 WARN_ON(1);
3065                                 tracing_disabled = 1;
3066                         }
3067                         goto out;
3068                 }
3069
3070                 global_trace.entries = val;
3071         }
3072
3073         filp->f_pos += cnt;
3074
3075         /* If check pages failed, return ENOMEM */
3076         if (tracing_disabled)
3077                 cnt = -ENOMEM;
3078  out:
3079         for_each_tracing_cpu(cpu) {
3080                 if (global_trace.data[cpu])
3081                         atomic_dec(&global_trace.data[cpu]->disabled);
3082                 if (max_tr.data[cpu])
3083                         atomic_dec(&max_tr.data[cpu]->disabled);
3084         }
3085
3086         tracing_start();
3087         max_tr.entries = global_trace.entries;
3088         mutex_unlock(&trace_types_lock);
3089
3090         return cnt;
3091 }
3092
3093 static int mark_printk(const char *fmt, ...)
3094 {
3095         int ret;
3096         va_list args;
3097         va_start(args, fmt);
3098         ret = trace_vprintk(0, fmt, args);
3099         va_end(args);
3100         return ret;
3101 }
3102
3103 static ssize_t
3104 tracing_mark_write(struct file *filp, const char __user *ubuf,
3105                                         size_t cnt, loff_t *fpos)
3106 {
3107         char *buf;
3108         char *end;
3109
3110         if (tracing_disabled)
3111                 return -EINVAL;
3112
3113         if (cnt > TRACE_BUF_SIZE)
3114                 cnt = TRACE_BUF_SIZE;
3115
3116         buf = kmalloc(cnt + 1, GFP_KERNEL);
3117         if (buf == NULL)
3118                 return -ENOMEM;
3119
3120         if (copy_from_user(buf, ubuf, cnt)) {
3121                 kfree(buf);
3122                 return -EFAULT;
3123         }
3124
3125         /* Cut from the first nil or newline. */
3126         buf[cnt] = '\0';
3127         end = strchr(buf, '\n');
3128         if (end)
3129                 *end = '\0';
3130
3131         cnt = mark_printk("%s\n", buf);
3132         kfree(buf);
3133         *fpos += cnt;
3134
3135         return cnt;
3136 }
3137
3138 static struct file_operations tracing_max_lat_fops = {
3139         .open           = tracing_open_generic,
3140         .read           = tracing_max_lat_read,
3141         .write          = tracing_max_lat_write,
3142 };
3143
3144 static struct file_operations tracing_ctrl_fops = {
3145         .open           = tracing_open_generic,
3146         .read           = tracing_ctrl_read,
3147         .write          = tracing_ctrl_write,
3148 };
3149
3150 static struct file_operations set_tracer_fops = {
3151         .open           = tracing_open_generic,
3152         .read           = tracing_set_trace_read,
3153         .write          = tracing_set_trace_write,
3154 };
3155
3156 static struct file_operations tracing_pipe_fops = {
3157         .open           = tracing_open_pipe,
3158         .poll           = tracing_poll_pipe,
3159         .read           = tracing_read_pipe,
3160         .release        = tracing_release_pipe,
3161 };
3162
3163 static struct file_operations tracing_entries_fops = {
3164         .open           = tracing_open_generic,
3165         .read           = tracing_entries_read,
3166         .write          = tracing_entries_write,
3167 };
3168
3169 static struct file_operations tracing_mark_fops = {
3170         .open           = tracing_open_generic,
3171         .write          = tracing_mark_write,
3172 };
3173
3174 #ifdef CONFIG_DYNAMIC_FTRACE
3175
3176 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3177 {
3178         return 0;
3179 }
3180
3181 static ssize_t
3182 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3183                   size_t cnt, loff_t *ppos)
3184 {
3185         static char ftrace_dyn_info_buffer[1024];
3186         static DEFINE_MUTEX(dyn_info_mutex);
3187         unsigned long *p = filp->private_data;
3188         char *buf = ftrace_dyn_info_buffer;
3189         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
3190         int r;
3191
3192         mutex_lock(&dyn_info_mutex);
3193         r = sprintf(buf, "%ld ", *p);
3194
3195         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3196         buf[r++] = '\n';
3197
3198         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3199
3200         mutex_unlock(&dyn_info_mutex);
3201
3202         return r;
3203 }
3204
3205 static struct file_operations tracing_dyn_info_fops = {
3206         .open           = tracing_open_generic,
3207         .read           = tracing_read_dyn_info,
3208 };
3209 #endif
3210
3211 static struct dentry *d_tracer;
3212
3213 struct dentry *tracing_init_dentry(void)
3214 {
3215         static int once;
3216
3217         if (d_tracer)
3218                 return d_tracer;
3219
3220         d_tracer = debugfs_create_dir("tracing", NULL);
3221
3222         if (!d_tracer && !once) {
3223                 once = 1;
3224                 pr_warning("Could not create debugfs directory 'tracing'\n");
3225                 return NULL;
3226         }
3227
3228         return d_tracer;
3229 }
3230
3231 #ifdef CONFIG_FTRACE_SELFTEST
3232 /* Let selftest have access to static functions in this file */
3233 #include "trace_selftest.c"
3234 #endif
3235
3236 static __init int tracer_init_debugfs(void)
3237 {
3238         struct dentry *d_tracer;
3239         struct dentry *entry;
3240
3241         d_tracer = tracing_init_dentry();
3242
3243         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
3244                                     &global_trace, &tracing_ctrl_fops);
3245         if (!entry)
3246                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
3247
3248         entry = debugfs_create_file("trace_options", 0644, d_tracer,
3249                                     NULL, &tracing_iter_fops);
3250         if (!entry)
3251                 pr_warning("Could not create debugfs 'trace_options' entry\n");
3252
3253         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
3254                                     NULL, &tracing_cpumask_fops);
3255         if (!entry)
3256                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3257
3258         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
3259                                     &global_trace, &tracing_lt_fops);
3260         if (!entry)
3261                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
3262
3263         entry = debugfs_create_file("trace", 0444, d_tracer,
3264                                     &global_trace, &tracing_fops);
3265         if (!entry)
3266                 pr_warning("Could not create debugfs 'trace' entry\n");
3267
3268         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
3269                                     &global_trace, &show_traces_fops);
3270         if (!entry)
3271                 pr_warning("Could not create debugfs 'available_tracers' entry\n");
3272
3273         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
3274                                     &global_trace, &set_tracer_fops);
3275         if (!entry)
3276                 pr_warning("Could not create debugfs 'current_tracer' entry\n");
3277
3278         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
3279                                     &tracing_max_latency,
3280                                     &tracing_max_lat_fops);
3281         if (!entry)
3282                 pr_warning("Could not create debugfs "
3283                            "'tracing_max_latency' entry\n");
3284
3285         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
3286                                     &tracing_thresh, &tracing_max_lat_fops);
3287         if (!entry)
3288                 pr_warning("Could not create debugfs "
3289                            "'tracing_thresh' entry\n");
3290         entry = debugfs_create_file("README", 0644, d_tracer,
3291                                     NULL, &tracing_readme_fops);
3292         if (!entry)
3293                 pr_warning("Could not create debugfs 'README' entry\n");
3294
3295         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
3296                                     NULL, &tracing_pipe_fops);
3297         if (!entry)
3298                 pr_warning("Could not create debugfs "
3299                            "'trace_pipe' entry\n");
3300
3301         entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
3302                                     &global_trace, &tracing_entries_fops);
3303         if (!entry)
3304                 pr_warning("Could not create debugfs "
3305                            "'buffer_size_kb' entry\n");
3306
3307         entry = debugfs_create_file("trace_marker", 0220, d_tracer,
3308                                     NULL, &tracing_mark_fops);
3309         if (!entry)
3310                 pr_warning("Could not create debugfs "
3311                            "'trace_marker' entry\n");
3312
3313 #ifdef CONFIG_DYNAMIC_FTRACE
3314         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3315                                     &ftrace_update_tot_cnt,
3316                                     &tracing_dyn_info_fops);
3317         if (!entry)
3318                 pr_warning("Could not create debugfs "
3319                            "'dyn_ftrace_total_info' entry\n");
3320 #endif
3321 #ifdef CONFIG_SYSPROF_TRACER
3322         init_tracer_sysprof_debugfs(d_tracer);
3323 #endif
3324         return 0;
3325 }
3326
3327 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3328 {
3329         static DEFINE_SPINLOCK(trace_buf_lock);
3330         static char trace_buf[TRACE_BUF_SIZE];
3331
3332         struct ring_buffer_event *event;
3333         struct trace_array *tr = &global_trace;
3334         struct trace_array_cpu *data;
3335         struct print_entry *entry;
3336         unsigned long flags, irq_flags;
3337         int cpu, len = 0, size, pc;
3338
3339         if (tracing_disabled)
3340                 return 0;
3341
3342         pc = preempt_count();
3343         preempt_disable_notrace();
3344         cpu = raw_smp_processor_id();
3345         data = tr->data[cpu];
3346
3347         if (unlikely(atomic_read(&data->disabled)))
3348                 goto out;
3349
3350         spin_lock_irqsave(&trace_buf_lock, flags);
3351         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3352
3353         len = min(len, TRACE_BUF_SIZE-1);
3354         trace_buf[len] = 0;
3355
3356         size = sizeof(*entry) + len + 1;
3357         event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3358         if (!event)
3359                 goto out_unlock;
3360         entry = ring_buffer_event_data(event);
3361         tracing_generic_entry_update(&entry->ent, flags, pc);
3362         entry->ent.type                 = TRACE_PRINT;
3363         entry->ip                       = ip;
3364
3365         memcpy(&entry->buf, trace_buf, len);
3366         entry->buf[len] = 0;
3367         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3368
3369  out_unlock:
3370         spin_unlock_irqrestore(&trace_buf_lock, flags);
3371
3372  out:
3373         preempt_enable_notrace();
3374
3375         return len;
3376 }
3377 EXPORT_SYMBOL_GPL(trace_vprintk);
3378
3379 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3380 {
3381         int ret;
3382         va_list ap;
3383
3384         if (!(trace_flags & TRACE_ITER_PRINTK))
3385                 return 0;
3386
3387         va_start(ap, fmt);
3388         ret = trace_vprintk(ip, fmt, ap);
3389         va_end(ap);
3390         return ret;
3391 }
3392 EXPORT_SYMBOL_GPL(__ftrace_printk);
3393
3394 static int trace_panic_handler(struct notifier_block *this,
3395                                unsigned long event, void *unused)
3396 {
3397         if (ftrace_dump_on_oops)
3398                 ftrace_dump();
3399         return NOTIFY_OK;
3400 }
3401
3402 static struct notifier_block trace_panic_notifier = {
3403         .notifier_call  = trace_panic_handler,
3404         .next           = NULL,
3405         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
3406 };
3407
3408 static int trace_die_handler(struct notifier_block *self,
3409                              unsigned long val,
3410                              void *data)
3411 {
3412         switch (val) {
3413         case DIE_OOPS:
3414                 if (ftrace_dump_on_oops)
3415                         ftrace_dump();
3416                 break;
3417         default:
3418                 break;
3419         }
3420         return NOTIFY_OK;
3421 }
3422
3423 static struct notifier_block trace_die_notifier = {
3424         .notifier_call = trace_die_handler,
3425         .priority = 200
3426 };
3427
3428 /*
3429  * printk is set to max of 1024, we really don't need it that big.
3430  * Nothing should be printing 1000 characters anyway.
3431  */
3432 #define TRACE_MAX_PRINT         1000
3433
3434 /*
3435  * Define here KERN_TRACE so that we have one place to modify
3436  * it if we decide to change what log level the ftrace dump
3437  * should be at.
3438  */
3439 #define KERN_TRACE              KERN_INFO
3440
3441 static void
3442 trace_printk_seq(struct trace_seq *s)
3443 {
3444         /* Probably should print a warning here. */
3445         if (s->len >= 1000)
3446                 s->len = 1000;
3447
3448         /* should be zero ended, but we are paranoid. */
3449         s->buffer[s->len] = 0;
3450
3451         printk(KERN_TRACE "%s", s->buffer);
3452
3453         trace_seq_reset(s);
3454 }
3455
3456 void ftrace_dump(void)
3457 {
3458         static DEFINE_SPINLOCK(ftrace_dump_lock);
3459         /* use static because iter can be a bit big for the stack */
3460         static struct trace_iterator iter;
3461         static cpumask_t mask;
3462         static int dump_ran;
3463         unsigned long flags;
3464         int cnt = 0, cpu;
3465
3466         /* only one dump */
3467         spin_lock_irqsave(&ftrace_dump_lock, flags);
3468         if (dump_ran)
3469                 goto out;
3470
3471         dump_ran = 1;
3472
3473         /* No turning back! */
3474         ftrace_kill();
3475
3476         for_each_tracing_cpu(cpu) {
3477                 atomic_inc(&global_trace.data[cpu]->disabled);
3478         }
3479
3480         printk(KERN_TRACE "Dumping ftrace buffer:\n");
3481
3482         iter.tr = &global_trace;
3483         iter.trace = current_trace;
3484
3485         /*
3486          * We need to stop all tracing on all CPUS to read the
3487          * the next buffer. This is a bit expensive, but is
3488          * not done often. We fill all what we can read,
3489          * and then release the locks again.
3490          */
3491
3492         cpus_clear(mask);
3493
3494         while (!trace_empty(&iter)) {
3495
3496                 if (!cnt)
3497                         printk(KERN_TRACE "---------------------------------\n");
3498
3499                 cnt++;
3500
3501                 /* reset all but tr, trace, and overruns */
3502                 memset(&iter.seq, 0,
3503                        sizeof(struct trace_iterator) -
3504                        offsetof(struct trace_iterator, seq));
3505                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3506                 iter.pos = -1;
3507
3508                 if (find_next_entry_inc(&iter) != NULL) {
3509                         print_trace_line(&iter);
3510                         trace_consume(&iter);
3511                 }
3512
3513                 trace_printk_seq(&iter.seq);
3514         }
3515
3516         if (!cnt)
3517                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
3518         else
3519                 printk(KERN_TRACE "---------------------------------\n");
3520
3521  out:
3522         spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3523 }
3524
3525 __init static int tracer_alloc_buffers(void)
3526 {
3527         struct trace_array_cpu *data;
3528         int i;
3529
3530         /* TODO: make the number of buffers hot pluggable with CPUS */
3531         tracing_buffer_mask = cpu_possible_map;
3532
3533         global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3534                                                    TRACE_BUFFER_FLAGS);
3535         if (!global_trace.buffer) {
3536                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3537                 WARN_ON(1);
3538                 return 0;
3539         }
3540         global_trace.entries = ring_buffer_size(global_trace.buffer);
3541
3542 #ifdef CONFIG_TRACER_MAX_TRACE
3543         max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3544                                              TRACE_BUFFER_FLAGS);
3545         if (!max_tr.buffer) {
3546                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3547                 WARN_ON(1);
3548                 ring_buffer_free(global_trace.buffer);
3549                 return 0;
3550         }
3551         max_tr.entries = ring_buffer_size(max_tr.buffer);
3552         WARN_ON(max_tr.entries != global_trace.entries);
3553 #endif
3554
3555         /* Allocate the first page for all buffers */
3556         for_each_tracing_cpu(i) {
3557                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3558                 max_tr.data[i] = &per_cpu(max_data, i);
3559         }
3560
3561         trace_init_cmdlines();
3562
3563         register_tracer(&nop_trace);
3564 #ifdef CONFIG_BOOT_TRACER
3565         register_tracer(&boot_tracer);
3566         current_trace = &boot_tracer;
3567         current_trace->init(&global_trace);
3568 #else
3569         current_trace = &nop_trace;
3570 #endif
3571
3572         /* All seems OK, enable tracing */
3573         tracing_disabled = 0;
3574
3575         atomic_notifier_chain_register(&panic_notifier_list,
3576                                        &trace_panic_notifier);
3577
3578         register_die_notifier(&trace_die_notifier);
3579
3580         return 0;
3581 }
3582 early_initcall(tracer_alloc_buffers);
3583 fs_initcall(tracer_init_debugfs);