ftrace: add stack trace to function tracer
[safe/jmp/linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/notifier.h>
18 #include <linux/debugfs.h>
19 #include <linux/pagemap.h>
20 #include <linux/hardirq.h>
21 #include <linux/linkage.h>
22 #include <linux/uaccess.h>
23 #include <linux/ftrace.h>
24 #include <linux/module.h>
25 #include <linux/percpu.h>
26 #include <linux/kdebug.h>
27 #include <linux/ctype.h>
28 #include <linux/init.h>
29 #include <linux/poll.h>
30 #include <linux/gfp.h>
31 #include <linux/fs.h>
32 #include <linux/kprobes.h>
33 #include <linux/writeback.h>
34
35 #include <linux/stacktrace.h>
36 #include <linux/ring_buffer.h>
37 #include <linux/irqflags.h>
38
39 #include "trace.h"
40 #include "trace_output.h"
41
42 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
43
44 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
45 unsigned long __read_mostly     tracing_thresh;
46
47 /*
48  * We need to change this state when a selftest is running.
49  * A selftest will lurk into the ring-buffer to count the
50  * entries inserted during the selftest although some concurrent
51  * insertions into the ring-buffer such as ftrace_printk could occurred
52  * at the same time, giving false positive or negative results.
53  */
54 static bool __read_mostly tracing_selftest_running;
55
56 /* For tracers that don't implement custom flags */
57 static struct tracer_opt dummy_tracer_opt[] = {
58         { }
59 };
60
61 static struct tracer_flags dummy_tracer_flags = {
62         .val = 0,
63         .opts = dummy_tracer_opt
64 };
65
66 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
67 {
68         return 0;
69 }
70
71 /*
72  * Kill all tracing for good (never come back).
73  * It is initialized to 1 but will turn to zero if the initialization
74  * of the tracer is successful. But that is the only place that sets
75  * this back to zero.
76  */
77 int tracing_disabled = 1;
78
79 static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
80
81 static inline void ftrace_disable_cpu(void)
82 {
83         preempt_disable();
84         local_inc(&__get_cpu_var(ftrace_cpu_disabled));
85 }
86
87 static inline void ftrace_enable_cpu(void)
88 {
89         local_dec(&__get_cpu_var(ftrace_cpu_disabled));
90         preempt_enable();
91 }
92
93 static cpumask_var_t __read_mostly      tracing_buffer_mask;
94
95 #define for_each_tracing_cpu(cpu)       \
96         for_each_cpu(cpu, tracing_buffer_mask)
97
98 /*
99  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
100  *
101  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
102  * is set, then ftrace_dump is called. This will output the contents
103  * of the ftrace buffers to the console.  This is very useful for
104  * capturing traces that lead to crashes and outputing it to a
105  * serial console.
106  *
107  * It is default off, but you can enable it with either specifying
108  * "ftrace_dump_on_oops" in the kernel command line, or setting
109  * /proc/sys/kernel/ftrace_dump_on_oops to true.
110  */
111 int ftrace_dump_on_oops;
112
113 static int tracing_set_tracer(char *buf);
114
115 static int __init set_ftrace(char *str)
116 {
117         tracing_set_tracer(str);
118         return 1;
119 }
120 __setup("ftrace", set_ftrace);
121
122 static int __init set_ftrace_dump_on_oops(char *str)
123 {
124         ftrace_dump_on_oops = 1;
125         return 1;
126 }
127 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
128
129 long
130 ns2usecs(cycle_t nsec)
131 {
132         nsec += 500;
133         do_div(nsec, 1000);
134         return nsec;
135 }
136
137 cycle_t ftrace_now(int cpu)
138 {
139         u64 ts = ring_buffer_time_stamp(cpu);
140         ring_buffer_normalize_time_stamp(cpu, &ts);
141         return ts;
142 }
143
144 /*
145  * The global_trace is the descriptor that holds the tracing
146  * buffers for the live tracing. For each CPU, it contains
147  * a link list of pages that will store trace entries. The
148  * page descriptor of the pages in the memory is used to hold
149  * the link list by linking the lru item in the page descriptor
150  * to each of the pages in the buffer per CPU.
151  *
152  * For each active CPU there is a data field that holds the
153  * pages for the buffer for that CPU. Each CPU has the same number
154  * of pages allocated for its buffer.
155  */
156 static struct trace_array       global_trace;
157
158 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
159
160 /*
161  * The max_tr is used to snapshot the global_trace when a maximum
162  * latency is reached. Some tracers will use this to store a maximum
163  * trace while it continues examining live traces.
164  *
165  * The buffers for the max_tr are set up the same as the global_trace.
166  * When a snapshot is taken, the link list of the max_tr is swapped
167  * with the link list of the global_trace and the buffers are reset for
168  * the global_trace so the tracing can continue.
169  */
170 static struct trace_array       max_tr;
171
172 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
173
174 /* tracer_enabled is used to toggle activation of a tracer */
175 static int                      tracer_enabled = 1;
176
177 /**
178  * tracing_is_enabled - return tracer_enabled status
179  *
180  * This function is used by other tracers to know the status
181  * of the tracer_enabled flag.  Tracers may use this function
182  * to know if it should enable their features when starting
183  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
184  */
185 int tracing_is_enabled(void)
186 {
187         return tracer_enabled;
188 }
189
190 /* function tracing enabled */
191 int                             ftrace_function_enabled;
192
193 /*
194  * trace_buf_size is the size in bytes that is allocated
195  * for a buffer. Note, the number of bytes is always rounded
196  * to page size.
197  *
198  * This number is purposely set to a low number of 16384.
199  * If the dump on oops happens, it will be much appreciated
200  * to not have to wait for all that output. Anyway this can be
201  * boot time and run time configurable.
202  */
203 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
204
205 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
206
207 /* trace_types holds a link list of available tracers. */
208 static struct tracer            *trace_types __read_mostly;
209
210 /* current_trace points to the tracer that is currently active */
211 static struct tracer            *current_trace __read_mostly;
212
213 /*
214  * max_tracer_type_len is used to simplify the allocating of
215  * buffers to read userspace tracer names. We keep track of
216  * the longest tracer name registered.
217  */
218 static int                      max_tracer_type_len;
219
220 /*
221  * trace_types_lock is used to protect the trace_types list.
222  * This lock is also used to keep user access serialized.
223  * Accesses from userspace will grab this lock while userspace
224  * activities happen inside the kernel.
225  */
226 static DEFINE_MUTEX(trace_types_lock);
227
228 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
229 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
230
231 /* trace_flags holds trace_options default values */
232 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
233         TRACE_ITER_ANNOTATE;
234
235 /**
236  * trace_wake_up - wake up tasks waiting for trace input
237  *
238  * Simply wakes up any task that is blocked on the trace_wait
239  * queue. These is used with trace_poll for tasks polling the trace.
240  */
241 void trace_wake_up(void)
242 {
243         /*
244          * The runqueue_is_locked() can fail, but this is the best we
245          * have for now:
246          */
247         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
248                 wake_up(&trace_wait);
249 }
250
251 static int __init set_buf_size(char *str)
252 {
253         unsigned long buf_size;
254         int ret;
255
256         if (!str)
257                 return 0;
258         ret = strict_strtoul(str, 0, &buf_size);
259         /* nr_entries can not be zero */
260         if (ret < 0 || buf_size == 0)
261                 return 0;
262         trace_buf_size = buf_size;
263         return 1;
264 }
265 __setup("trace_buf_size=", set_buf_size);
266
267 unsigned long nsecs_to_usecs(unsigned long nsecs)
268 {
269         return nsecs / 1000;
270 }
271
272 /* These must match the bit postions in trace_iterator_flags */
273 static const char *trace_options[] = {
274         "print-parent",
275         "sym-offset",
276         "sym-addr",
277         "verbose",
278         "raw",
279         "hex",
280         "bin",
281         "block",
282         "stacktrace",
283         "sched-tree",
284         "ftrace_printk",
285         "ftrace_preempt",
286         "branch",
287         "annotate",
288         "userstacktrace",
289         "sym-userobj",
290         "printk-msg-only",
291         NULL
292 };
293
294 /*
295  * ftrace_max_lock is used to protect the swapping of buffers
296  * when taking a max snapshot. The buffers themselves are
297  * protected by per_cpu spinlocks. But the action of the swap
298  * needs its own lock.
299  *
300  * This is defined as a raw_spinlock_t in order to help
301  * with performance when lockdep debugging is enabled.
302  */
303 static raw_spinlock_t ftrace_max_lock =
304         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
305
306 /*
307  * Copy the new maximum trace into the separate maximum-trace
308  * structure. (this way the maximum trace is permanently saved,
309  * for later retrieval via /debugfs/tracing/latency_trace)
310  */
311 static void
312 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
313 {
314         struct trace_array_cpu *data = tr->data[cpu];
315
316         max_tr.cpu = cpu;
317         max_tr.time_start = data->preempt_timestamp;
318
319         data = max_tr.data[cpu];
320         data->saved_latency = tracing_max_latency;
321
322         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
323         data->pid = tsk->pid;
324         data->uid = task_uid(tsk);
325         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
326         data->policy = tsk->policy;
327         data->rt_priority = tsk->rt_priority;
328
329         /* record this tasks comm */
330         tracing_record_cmdline(current);
331 }
332
333 static void
334 trace_seq_reset(struct trace_seq *s)
335 {
336         s->len = 0;
337         s->readpos = 0;
338 }
339
340 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
341 {
342         int len;
343         int ret;
344
345         if (s->len <= s->readpos)
346                 return -EBUSY;
347
348         len = s->len - s->readpos;
349         if (cnt > len)
350                 cnt = len;
351         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
352         if (ret)
353                 return -EFAULT;
354
355         s->readpos += len;
356         return cnt;
357 }
358
359 static void
360 trace_print_seq(struct seq_file *m, struct trace_seq *s)
361 {
362         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
363
364         s->buffer[len] = 0;
365         seq_puts(m, s->buffer);
366
367         trace_seq_reset(s);
368 }
369
370 /**
371  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
372  * @tr: tracer
373  * @tsk: the task with the latency
374  * @cpu: The cpu that initiated the trace.
375  *
376  * Flip the buffers between the @tr and the max_tr and record information
377  * about which task was the cause of this latency.
378  */
379 void
380 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
381 {
382         struct ring_buffer *buf = tr->buffer;
383
384         WARN_ON_ONCE(!irqs_disabled());
385         __raw_spin_lock(&ftrace_max_lock);
386
387         tr->buffer = max_tr.buffer;
388         max_tr.buffer = buf;
389
390         ftrace_disable_cpu();
391         ring_buffer_reset(tr->buffer);
392         ftrace_enable_cpu();
393
394         __update_max_tr(tr, tsk, cpu);
395         __raw_spin_unlock(&ftrace_max_lock);
396 }
397
398 /**
399  * update_max_tr_single - only copy one trace over, and reset the rest
400  * @tr - tracer
401  * @tsk - task with the latency
402  * @cpu - the cpu of the buffer to copy.
403  *
404  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
405  */
406 void
407 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
408 {
409         int ret;
410
411         WARN_ON_ONCE(!irqs_disabled());
412         __raw_spin_lock(&ftrace_max_lock);
413
414         ftrace_disable_cpu();
415
416         ring_buffer_reset(max_tr.buffer);
417         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
418
419         ftrace_enable_cpu();
420
421         WARN_ON_ONCE(ret);
422
423         __update_max_tr(tr, tsk, cpu);
424         __raw_spin_unlock(&ftrace_max_lock);
425 }
426
427 /**
428  * register_tracer - register a tracer with the ftrace system.
429  * @type - the plugin for the tracer
430  *
431  * Register a new plugin tracer.
432  */
433 int register_tracer(struct tracer *type)
434 {
435         struct tracer *t;
436         int len;
437         int ret = 0;
438
439         if (!type->name) {
440                 pr_info("Tracer must have a name\n");
441                 return -1;
442         }
443
444         /*
445          * When this gets called we hold the BKL which means that
446          * preemption is disabled. Various trace selftests however
447          * need to disable and enable preemption for successful tests.
448          * So we drop the BKL here and grab it after the tests again.
449          */
450         unlock_kernel();
451         mutex_lock(&trace_types_lock);
452
453         tracing_selftest_running = true;
454
455         for (t = trace_types; t; t = t->next) {
456                 if (strcmp(type->name, t->name) == 0) {
457                         /* already found */
458                         pr_info("Trace %s already registered\n",
459                                 type->name);
460                         ret = -1;
461                         goto out;
462                 }
463         }
464
465         if (!type->set_flag)
466                 type->set_flag = &dummy_set_flag;
467         if (!type->flags)
468                 type->flags = &dummy_tracer_flags;
469         else
470                 if (!type->flags->opts)
471                         type->flags->opts = dummy_tracer_opt;
472
473 #ifdef CONFIG_FTRACE_STARTUP_TEST
474         if (type->selftest) {
475                 struct tracer *saved_tracer = current_trace;
476                 struct trace_array *tr = &global_trace;
477                 int i;
478
479                 /*
480                  * Run a selftest on this tracer.
481                  * Here we reset the trace buffer, and set the current
482                  * tracer to be this tracer. The tracer can then run some
483                  * internal tracing to verify that everything is in order.
484                  * If we fail, we do not register this tracer.
485                  */
486                 for_each_tracing_cpu(i)
487                         tracing_reset(tr, i);
488
489                 current_trace = type;
490                 /* the test is responsible for initializing and enabling */
491                 pr_info("Testing tracer %s: ", type->name);
492                 ret = type->selftest(type, tr);
493                 /* the test is responsible for resetting too */
494                 current_trace = saved_tracer;
495                 if (ret) {
496                         printk(KERN_CONT "FAILED!\n");
497                         goto out;
498                 }
499                 /* Only reset on passing, to avoid touching corrupted buffers */
500                 for_each_tracing_cpu(i)
501                         tracing_reset(tr, i);
502
503                 printk(KERN_CONT "PASSED\n");
504         }
505 #endif
506
507         type->next = trace_types;
508         trace_types = type;
509         len = strlen(type->name);
510         if (len > max_tracer_type_len)
511                 max_tracer_type_len = len;
512
513  out:
514         tracing_selftest_running = false;
515         mutex_unlock(&trace_types_lock);
516         lock_kernel();
517
518         return ret;
519 }
520
521 void unregister_tracer(struct tracer *type)
522 {
523         struct tracer **t;
524         int len;
525
526         mutex_lock(&trace_types_lock);
527         for (t = &trace_types; *t; t = &(*t)->next) {
528                 if (*t == type)
529                         goto found;
530         }
531         pr_info("Trace %s not registered\n", type->name);
532         goto out;
533
534  found:
535         *t = (*t)->next;
536         if (strlen(type->name) != max_tracer_type_len)
537                 goto out;
538
539         max_tracer_type_len = 0;
540         for (t = &trace_types; *t; t = &(*t)->next) {
541                 len = strlen((*t)->name);
542                 if (len > max_tracer_type_len)
543                         max_tracer_type_len = len;
544         }
545  out:
546         mutex_unlock(&trace_types_lock);
547 }
548
549 void tracing_reset(struct trace_array *tr, int cpu)
550 {
551         ftrace_disable_cpu();
552         ring_buffer_reset_cpu(tr->buffer, cpu);
553         ftrace_enable_cpu();
554 }
555
556 void tracing_reset_online_cpus(struct trace_array *tr)
557 {
558         int cpu;
559
560         tr->time_start = ftrace_now(tr->cpu);
561
562         for_each_online_cpu(cpu)
563                 tracing_reset(tr, cpu);
564 }
565
566 #define SAVED_CMDLINES 128
567 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
568 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
569 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
570 static int cmdline_idx;
571 static DEFINE_SPINLOCK(trace_cmdline_lock);
572
573 /* temporary disable recording */
574 atomic_t trace_record_cmdline_disabled __read_mostly;
575
576 static void trace_init_cmdlines(void)
577 {
578         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
579         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
580         cmdline_idx = 0;
581 }
582
583 static int trace_stop_count;
584 static DEFINE_SPINLOCK(tracing_start_lock);
585
586 /**
587  * ftrace_off_permanent - disable all ftrace code permanently
588  *
589  * This should only be called when a serious anomally has
590  * been detected.  This will turn off the function tracing,
591  * ring buffers, and other tracing utilites. It takes no
592  * locks and can be called from any context.
593  */
594 void ftrace_off_permanent(void)
595 {
596         tracing_disabled = 1;
597         ftrace_stop();
598         tracing_off_permanent();
599 }
600
601 /**
602  * tracing_start - quick start of the tracer
603  *
604  * If tracing is enabled but was stopped by tracing_stop,
605  * this will start the tracer back up.
606  */
607 void tracing_start(void)
608 {
609         struct ring_buffer *buffer;
610         unsigned long flags;
611
612         if (tracing_disabled)
613                 return;
614
615         spin_lock_irqsave(&tracing_start_lock, flags);
616         if (--trace_stop_count)
617                 goto out;
618
619         if (trace_stop_count < 0) {
620                 /* Someone screwed up their debugging */
621                 WARN_ON_ONCE(1);
622                 trace_stop_count = 0;
623                 goto out;
624         }
625
626
627         buffer = global_trace.buffer;
628         if (buffer)
629                 ring_buffer_record_enable(buffer);
630
631         buffer = max_tr.buffer;
632         if (buffer)
633                 ring_buffer_record_enable(buffer);
634
635         ftrace_start();
636  out:
637         spin_unlock_irqrestore(&tracing_start_lock, flags);
638 }
639
640 /**
641  * tracing_stop - quick stop of the tracer
642  *
643  * Light weight way to stop tracing. Use in conjunction with
644  * tracing_start.
645  */
646 void tracing_stop(void)
647 {
648         struct ring_buffer *buffer;
649         unsigned long flags;
650
651         ftrace_stop();
652         spin_lock_irqsave(&tracing_start_lock, flags);
653         if (trace_stop_count++)
654                 goto out;
655
656         buffer = global_trace.buffer;
657         if (buffer)
658                 ring_buffer_record_disable(buffer);
659
660         buffer = max_tr.buffer;
661         if (buffer)
662                 ring_buffer_record_disable(buffer);
663
664  out:
665         spin_unlock_irqrestore(&tracing_start_lock, flags);
666 }
667
668 void trace_stop_cmdline_recording(void);
669
670 static void trace_save_cmdline(struct task_struct *tsk)
671 {
672         unsigned map;
673         unsigned idx;
674
675         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
676                 return;
677
678         /*
679          * It's not the end of the world if we don't get
680          * the lock, but we also don't want to spin
681          * nor do we want to disable interrupts,
682          * so if we miss here, then better luck next time.
683          */
684         if (!spin_trylock(&trace_cmdline_lock))
685                 return;
686
687         idx = map_pid_to_cmdline[tsk->pid];
688         if (idx >= SAVED_CMDLINES) {
689                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
690
691                 map = map_cmdline_to_pid[idx];
692                 if (map <= PID_MAX_DEFAULT)
693                         map_pid_to_cmdline[map] = (unsigned)-1;
694
695                 map_pid_to_cmdline[tsk->pid] = idx;
696
697                 cmdline_idx = idx;
698         }
699
700         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
701
702         spin_unlock(&trace_cmdline_lock);
703 }
704
705 char *trace_find_cmdline(int pid)
706 {
707         char *cmdline = "<...>";
708         unsigned map;
709
710         if (!pid)
711                 return "<idle>";
712
713         if (pid > PID_MAX_DEFAULT)
714                 goto out;
715
716         map = map_pid_to_cmdline[pid];
717         if (map >= SAVED_CMDLINES)
718                 goto out;
719
720         cmdline = saved_cmdlines[map];
721
722  out:
723         return cmdline;
724 }
725
726 void tracing_record_cmdline(struct task_struct *tsk)
727 {
728         if (atomic_read(&trace_record_cmdline_disabled))
729                 return;
730
731         trace_save_cmdline(tsk);
732 }
733
734 void
735 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
736                              int pc)
737 {
738         struct task_struct *tsk = current;
739
740         entry->preempt_count            = pc & 0xff;
741         entry->pid                      = (tsk) ? tsk->pid : 0;
742         entry->tgid                     = (tsk) ? tsk->tgid : 0;
743         entry->flags =
744 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
745                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
746 #else
747                 TRACE_FLAG_IRQS_NOSUPPORT |
748 #endif
749                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
750                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
751                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
752 }
753
754 void
755 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
756                unsigned long ip, unsigned long parent_ip, unsigned long flags,
757                int pc)
758 {
759         struct ring_buffer_event *event;
760         struct ftrace_entry *entry;
761         unsigned long irq_flags;
762
763         /* If we are reading the ring buffer, don't trace */
764         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
765                 return;
766
767         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
768                                          &irq_flags);
769         if (!event)
770                 return;
771         entry   = ring_buffer_event_data(event);
772         tracing_generic_entry_update(&entry->ent, flags, pc);
773         entry->ent.type                 = TRACE_FN;
774         entry->ip                       = ip;
775         entry->parent_ip                = parent_ip;
776         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
777 }
778
779 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
780 static void __trace_graph_entry(struct trace_array *tr,
781                                 struct trace_array_cpu *data,
782                                 struct ftrace_graph_ent *trace,
783                                 unsigned long flags,
784                                 int pc)
785 {
786         struct ring_buffer_event *event;
787         struct ftrace_graph_ent_entry *entry;
788         unsigned long irq_flags;
789
790         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
791                 return;
792
793         event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
794                                          &irq_flags);
795         if (!event)
796                 return;
797         entry   = ring_buffer_event_data(event);
798         tracing_generic_entry_update(&entry->ent, flags, pc);
799         entry->ent.type                 = TRACE_GRAPH_ENT;
800         entry->graph_ent                        = *trace;
801         ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
802 }
803
804 static void __trace_graph_return(struct trace_array *tr,
805                                 struct trace_array_cpu *data,
806                                 struct ftrace_graph_ret *trace,
807                                 unsigned long flags,
808                                 int pc)
809 {
810         struct ring_buffer_event *event;
811         struct ftrace_graph_ret_entry *entry;
812         unsigned long irq_flags;
813
814         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
815                 return;
816
817         event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
818                                          &irq_flags);
819         if (!event)
820                 return;
821         entry   = ring_buffer_event_data(event);
822         tracing_generic_entry_update(&entry->ent, flags, pc);
823         entry->ent.type                 = TRACE_GRAPH_RET;
824         entry->ret                              = *trace;
825         ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
826 }
827 #endif
828
829 void
830 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
831        unsigned long ip, unsigned long parent_ip, unsigned long flags,
832        int pc)
833 {
834         if (likely(!atomic_read(&data->disabled)))
835                 trace_function(tr, data, ip, parent_ip, flags, pc);
836 }
837
838 static void __ftrace_trace_stack(struct trace_array *tr,
839                                  struct trace_array_cpu *data,
840                                  unsigned long flags,
841                                  int skip, int pc)
842 {
843 #ifdef CONFIG_STACKTRACE
844         struct ring_buffer_event *event;
845         struct stack_entry *entry;
846         struct stack_trace trace;
847         unsigned long irq_flags;
848
849         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
850                                          &irq_flags);
851         if (!event)
852                 return;
853         entry   = ring_buffer_event_data(event);
854         tracing_generic_entry_update(&entry->ent, flags, pc);
855         entry->ent.type         = TRACE_STACK;
856
857         memset(&entry->caller, 0, sizeof(entry->caller));
858
859         trace.nr_entries        = 0;
860         trace.max_entries       = FTRACE_STACK_ENTRIES;
861         trace.skip              = skip;
862         trace.entries           = entry->caller;
863
864         save_stack_trace(&trace);
865         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
866 #endif
867 }
868
869 static void ftrace_trace_stack(struct trace_array *tr,
870                                struct trace_array_cpu *data,
871                                unsigned long flags,
872                                int skip, int pc)
873 {
874         if (!(trace_flags & TRACE_ITER_STACKTRACE))
875                 return;
876
877         __ftrace_trace_stack(tr, data, flags, skip, pc);
878 }
879
880 void __trace_stack(struct trace_array *tr,
881                    struct trace_array_cpu *data,
882                    unsigned long flags,
883                    int skip, int pc)
884 {
885         __ftrace_trace_stack(tr, data, flags, skip, pc);
886 }
887
888 static void ftrace_trace_userstack(struct trace_array *tr,
889                    struct trace_array_cpu *data,
890                    unsigned long flags, int pc)
891 {
892 #ifdef CONFIG_STACKTRACE
893         struct ring_buffer_event *event;
894         struct userstack_entry *entry;
895         struct stack_trace trace;
896         unsigned long irq_flags;
897
898         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
899                 return;
900
901         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
902                                          &irq_flags);
903         if (!event)
904                 return;
905         entry   = ring_buffer_event_data(event);
906         tracing_generic_entry_update(&entry->ent, flags, pc);
907         entry->ent.type         = TRACE_USER_STACK;
908
909         memset(&entry->caller, 0, sizeof(entry->caller));
910
911         trace.nr_entries        = 0;
912         trace.max_entries       = FTRACE_STACK_ENTRIES;
913         trace.skip              = 0;
914         trace.entries           = entry->caller;
915
916         save_stack_trace_user(&trace);
917         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
918 #endif
919 }
920
921 void __trace_userstack(struct trace_array *tr,
922                    struct trace_array_cpu *data,
923                    unsigned long flags)
924 {
925         ftrace_trace_userstack(tr, data, flags, preempt_count());
926 }
927
928 static void
929 ftrace_trace_special(void *__tr, void *__data,
930                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
931                      int pc)
932 {
933         struct ring_buffer_event *event;
934         struct trace_array_cpu *data = __data;
935         struct trace_array *tr = __tr;
936         struct special_entry *entry;
937         unsigned long irq_flags;
938
939         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
940                                          &irq_flags);
941         if (!event)
942                 return;
943         entry   = ring_buffer_event_data(event);
944         tracing_generic_entry_update(&entry->ent, 0, pc);
945         entry->ent.type                 = TRACE_SPECIAL;
946         entry->arg1                     = arg1;
947         entry->arg2                     = arg2;
948         entry->arg3                     = arg3;
949         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
950         ftrace_trace_stack(tr, data, irq_flags, 4, pc);
951         ftrace_trace_userstack(tr, data, irq_flags, pc);
952
953         trace_wake_up();
954 }
955
956 void
957 __trace_special(void *__tr, void *__data,
958                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
959 {
960         ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
961 }
962
963 void
964 tracing_sched_switch_trace(struct trace_array *tr,
965                            struct trace_array_cpu *data,
966                            struct task_struct *prev,
967                            struct task_struct *next,
968                            unsigned long flags, int pc)
969 {
970         struct ring_buffer_event *event;
971         struct ctx_switch_entry *entry;
972         unsigned long irq_flags;
973
974         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
975                                            &irq_flags);
976         if (!event)
977                 return;
978         entry   = ring_buffer_event_data(event);
979         tracing_generic_entry_update(&entry->ent, flags, pc);
980         entry->ent.type                 = TRACE_CTX;
981         entry->prev_pid                 = prev->pid;
982         entry->prev_prio                = prev->prio;
983         entry->prev_state               = prev->state;
984         entry->next_pid                 = next->pid;
985         entry->next_prio                = next->prio;
986         entry->next_state               = next->state;
987         entry->next_cpu = task_cpu(next);
988         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
989         ftrace_trace_stack(tr, data, flags, 5, pc);
990         ftrace_trace_userstack(tr, data, flags, pc);
991 }
992
993 void
994 tracing_sched_wakeup_trace(struct trace_array *tr,
995                            struct trace_array_cpu *data,
996                            struct task_struct *wakee,
997                            struct task_struct *curr,
998                            unsigned long flags, int pc)
999 {
1000         struct ring_buffer_event *event;
1001         struct ctx_switch_entry *entry;
1002         unsigned long irq_flags;
1003
1004         event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
1005                                            &irq_flags);
1006         if (!event)
1007                 return;
1008         entry   = ring_buffer_event_data(event);
1009         tracing_generic_entry_update(&entry->ent, flags, pc);
1010         entry->ent.type                 = TRACE_WAKE;
1011         entry->prev_pid                 = curr->pid;
1012         entry->prev_prio                = curr->prio;
1013         entry->prev_state               = curr->state;
1014         entry->next_pid                 = wakee->pid;
1015         entry->next_prio                = wakee->prio;
1016         entry->next_state               = wakee->state;
1017         entry->next_cpu                 = task_cpu(wakee);
1018         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1019         ftrace_trace_stack(tr, data, flags, 6, pc);
1020         ftrace_trace_userstack(tr, data, flags, pc);
1021
1022         trace_wake_up();
1023 }
1024
1025 void
1026 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1027 {
1028         struct trace_array *tr = &global_trace;
1029         struct trace_array_cpu *data;
1030         unsigned long flags;
1031         int cpu;
1032         int pc;
1033
1034         if (tracing_disabled)
1035                 return;
1036
1037         pc = preempt_count();
1038         local_irq_save(flags);
1039         cpu = raw_smp_processor_id();
1040         data = tr->data[cpu];
1041
1042         if (likely(atomic_inc_return(&data->disabled) == 1))
1043                 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1044
1045         atomic_dec(&data->disabled);
1046         local_irq_restore(flags);
1047 }
1048
1049 #ifdef CONFIG_FUNCTION_TRACER
1050 static void
1051 function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1052 {
1053         struct trace_array *tr = &global_trace;
1054         struct trace_array_cpu *data;
1055         unsigned long flags;
1056         long disabled;
1057         int cpu, resched;
1058         int pc;
1059
1060         if (unlikely(!ftrace_function_enabled))
1061                 return;
1062
1063         pc = preempt_count();
1064         resched = ftrace_preempt_disable();
1065         local_save_flags(flags);
1066         cpu = raw_smp_processor_id();
1067         data = tr->data[cpu];
1068         disabled = atomic_inc_return(&data->disabled);
1069
1070         if (likely(disabled == 1))
1071                 trace_function(tr, data, ip, parent_ip, flags, pc);
1072
1073         atomic_dec(&data->disabled);
1074         ftrace_preempt_enable(resched);
1075 }
1076
1077 static void
1078 function_trace_call(unsigned long ip, unsigned long parent_ip)
1079 {
1080         struct trace_array *tr = &global_trace;
1081         struct trace_array_cpu *data;
1082         unsigned long flags;
1083         long disabled;
1084         int cpu;
1085         int pc;
1086
1087         if (unlikely(!ftrace_function_enabled))
1088                 return;
1089
1090         /*
1091          * Need to use raw, since this must be called before the
1092          * recursive protection is performed.
1093          */
1094         local_irq_save(flags);
1095         cpu = raw_smp_processor_id();
1096         data = tr->data[cpu];
1097         disabled = atomic_inc_return(&data->disabled);
1098
1099         if (likely(disabled == 1)) {
1100                 pc = preempt_count();
1101                 trace_function(tr, data, ip, parent_ip, flags, pc);
1102         }
1103
1104         atomic_dec(&data->disabled);
1105         local_irq_restore(flags);
1106 }
1107
1108 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1109 int trace_graph_entry(struct ftrace_graph_ent *trace)
1110 {
1111         struct trace_array *tr = &global_trace;
1112         struct trace_array_cpu *data;
1113         unsigned long flags;
1114         long disabled;
1115         int cpu;
1116         int pc;
1117
1118         if (!ftrace_trace_task(current))
1119                 return 0;
1120
1121         if (!ftrace_graph_addr(trace->func))
1122                 return 0;
1123
1124         local_irq_save(flags);
1125         cpu = raw_smp_processor_id();
1126         data = tr->data[cpu];
1127         disabled = atomic_inc_return(&data->disabled);
1128         if (likely(disabled == 1)) {
1129                 pc = preempt_count();
1130                 __trace_graph_entry(tr, data, trace, flags, pc);
1131         }
1132         /* Only do the atomic if it is not already set */
1133         if (!test_tsk_trace_graph(current))
1134                 set_tsk_trace_graph(current);
1135         atomic_dec(&data->disabled);
1136         local_irq_restore(flags);
1137
1138         return 1;
1139 }
1140
1141 void trace_graph_return(struct ftrace_graph_ret *trace)
1142 {
1143         struct trace_array *tr = &global_trace;
1144         struct trace_array_cpu *data;
1145         unsigned long flags;
1146         long disabled;
1147         int cpu;
1148         int pc;
1149
1150         local_irq_save(flags);
1151         cpu = raw_smp_processor_id();
1152         data = tr->data[cpu];
1153         disabled = atomic_inc_return(&data->disabled);
1154         if (likely(disabled == 1)) {
1155                 pc = preempt_count();
1156                 __trace_graph_return(tr, data, trace, flags, pc);
1157         }
1158         if (!trace->depth)
1159                 clear_tsk_trace_graph(current);
1160         atomic_dec(&data->disabled);
1161         local_irq_restore(flags);
1162 }
1163 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1164
1165 static struct ftrace_ops trace_ops __read_mostly =
1166 {
1167         .func = function_trace_call,
1168 };
1169
1170 void tracing_start_function_trace(void)
1171 {
1172         ftrace_function_enabled = 0;
1173
1174         if (trace_flags & TRACE_ITER_PREEMPTONLY)
1175                 trace_ops.func = function_trace_call_preempt_only;
1176         else
1177                 trace_ops.func = function_trace_call;
1178
1179         register_ftrace_function(&trace_ops);
1180         ftrace_function_enabled = 1;
1181 }
1182
1183 void tracing_stop_function_trace(void)
1184 {
1185         ftrace_function_enabled = 0;
1186         unregister_ftrace_function(&trace_ops);
1187 }
1188 #endif
1189
1190 enum trace_file_type {
1191         TRACE_FILE_LAT_FMT      = 1,
1192         TRACE_FILE_ANNOTATE     = 2,
1193 };
1194
1195 static void trace_iterator_increment(struct trace_iterator *iter)
1196 {
1197         /* Don't allow ftrace to trace into the ring buffers */
1198         ftrace_disable_cpu();
1199
1200         iter->idx++;
1201         if (iter->buffer_iter[iter->cpu])
1202                 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1203
1204         ftrace_enable_cpu();
1205 }
1206
1207 static struct trace_entry *
1208 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1209 {
1210         struct ring_buffer_event *event;
1211         struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1212
1213         /* Don't allow ftrace to trace into the ring buffers */
1214         ftrace_disable_cpu();
1215
1216         if (buf_iter)
1217                 event = ring_buffer_iter_peek(buf_iter, ts);
1218         else
1219                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1220
1221         ftrace_enable_cpu();
1222
1223         return event ? ring_buffer_event_data(event) : NULL;
1224 }
1225
1226 static struct trace_entry *
1227 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1228 {
1229         struct ring_buffer *buffer = iter->tr->buffer;
1230         struct trace_entry *ent, *next = NULL;
1231         u64 next_ts = 0, ts;
1232         int next_cpu = -1;
1233         int cpu;
1234
1235         for_each_tracing_cpu(cpu) {
1236
1237                 if (ring_buffer_empty_cpu(buffer, cpu))
1238                         continue;
1239
1240                 ent = peek_next_entry(iter, cpu, &ts);
1241
1242                 /*
1243                  * Pick the entry with the smallest timestamp:
1244                  */
1245                 if (ent && (!next || ts < next_ts)) {
1246                         next = ent;
1247                         next_cpu = cpu;
1248                         next_ts = ts;
1249                 }
1250         }
1251
1252         if (ent_cpu)
1253                 *ent_cpu = next_cpu;
1254
1255         if (ent_ts)
1256                 *ent_ts = next_ts;
1257
1258         return next;
1259 }
1260
1261 /* Find the next real entry, without updating the iterator itself */
1262 static struct trace_entry *
1263 find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1264 {
1265         return __find_next_entry(iter, ent_cpu, ent_ts);
1266 }
1267
1268 /* Find the next real entry, and increment the iterator to the next entry */
1269 static void *find_next_entry_inc(struct trace_iterator *iter)
1270 {
1271         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1272
1273         if (iter->ent)
1274                 trace_iterator_increment(iter);
1275
1276         return iter->ent ? iter : NULL;
1277 }
1278
1279 static void trace_consume(struct trace_iterator *iter)
1280 {
1281         /* Don't allow ftrace to trace into the ring buffers */
1282         ftrace_disable_cpu();
1283         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1284         ftrace_enable_cpu();
1285 }
1286
1287 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1288 {
1289         struct trace_iterator *iter = m->private;
1290         int i = (int)*pos;
1291         void *ent;
1292
1293         (*pos)++;
1294
1295         /* can't go backwards */
1296         if (iter->idx > i)
1297                 return NULL;
1298
1299         if (iter->idx < 0)
1300                 ent = find_next_entry_inc(iter);
1301         else
1302                 ent = iter;
1303
1304         while (ent && iter->idx < i)
1305                 ent = find_next_entry_inc(iter);
1306
1307         iter->pos = *pos;
1308
1309         return ent;
1310 }
1311
1312 static void *s_start(struct seq_file *m, loff_t *pos)
1313 {
1314         struct trace_iterator *iter = m->private;
1315         void *p = NULL;
1316         loff_t l = 0;
1317         int cpu;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         if (!current_trace || current_trace != iter->trace) {
1322                 mutex_unlock(&trace_types_lock);
1323                 return NULL;
1324         }
1325
1326         atomic_inc(&trace_record_cmdline_disabled);
1327
1328         if (*pos != iter->pos) {
1329                 iter->ent = NULL;
1330                 iter->cpu = 0;
1331                 iter->idx = -1;
1332
1333                 ftrace_disable_cpu();
1334
1335                 for_each_tracing_cpu(cpu) {
1336                         ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1337                 }
1338
1339                 ftrace_enable_cpu();
1340
1341                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1342                         ;
1343
1344         } else {
1345                 l = *pos - 1;
1346                 p = s_next(m, p, &l);
1347         }
1348
1349         return p;
1350 }
1351
1352 static void s_stop(struct seq_file *m, void *p)
1353 {
1354         atomic_dec(&trace_record_cmdline_disabled);
1355         mutex_unlock(&trace_types_lock);
1356 }
1357
1358 static void print_lat_help_header(struct seq_file *m)
1359 {
1360         seq_puts(m, "#                  _------=> CPU#            \n");
1361         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1362         seq_puts(m, "#                | / _----=> need-resched    \n");
1363         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1364         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1365         seq_puts(m, "#                |||| /                      \n");
1366         seq_puts(m, "#                |||||     delay             \n");
1367         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1368         seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
1369 }
1370
1371 static void print_func_help_header(struct seq_file *m)
1372 {
1373         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1374         seq_puts(m, "#              | |       |          |         |\n");
1375 }
1376
1377
1378 static void
1379 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1380 {
1381         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1382         struct trace_array *tr = iter->tr;
1383         struct trace_array_cpu *data = tr->data[tr->cpu];
1384         struct tracer *type = current_trace;
1385         unsigned long total;
1386         unsigned long entries;
1387         const char *name = "preemption";
1388
1389         if (type)
1390                 name = type->name;
1391
1392         entries = ring_buffer_entries(iter->tr->buffer);
1393         total = entries +
1394                 ring_buffer_overruns(iter->tr->buffer);
1395
1396         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1397                    name, UTS_RELEASE);
1398         seq_puts(m, "-----------------------------------"
1399                  "---------------------------------\n");
1400         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1401                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1402                    nsecs_to_usecs(data->saved_latency),
1403                    entries,
1404                    total,
1405                    tr->cpu,
1406 #if defined(CONFIG_PREEMPT_NONE)
1407                    "server",
1408 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1409                    "desktop",
1410 #elif defined(CONFIG_PREEMPT)
1411                    "preempt",
1412 #else
1413                    "unknown",
1414 #endif
1415                    /* These are reserved for later use */
1416                    0, 0, 0, 0);
1417 #ifdef CONFIG_SMP
1418         seq_printf(m, " #P:%d)\n", num_online_cpus());
1419 #else
1420         seq_puts(m, ")\n");
1421 #endif
1422         seq_puts(m, "    -----------------\n");
1423         seq_printf(m, "    | task: %.16s-%d "
1424                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1425                    data->comm, data->pid, data->uid, data->nice,
1426                    data->policy, data->rt_priority);
1427         seq_puts(m, "    -----------------\n");
1428
1429         if (data->critical_start) {
1430                 seq_puts(m, " => started at: ");
1431                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1432                 trace_print_seq(m, &iter->seq);
1433                 seq_puts(m, "\n => ended at:   ");
1434                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1435                 trace_print_seq(m, &iter->seq);
1436                 seq_puts(m, "\n");
1437         }
1438
1439         seq_puts(m, "\n");
1440 }
1441
1442 static void
1443 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1444 {
1445         int hardirq, softirq;
1446         char *comm;
1447
1448         comm = trace_find_cmdline(entry->pid);
1449
1450         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1451         trace_seq_printf(s, "%3d", cpu);
1452         trace_seq_printf(s, "%c%c",
1453                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1454                          (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1455                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1456
1457         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1458         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1459         if (hardirq && softirq) {
1460                 trace_seq_putc(s, 'H');
1461         } else {
1462                 if (hardirq) {
1463                         trace_seq_putc(s, 'h');
1464                 } else {
1465                         if (softirq)
1466                                 trace_seq_putc(s, 's');
1467                         else
1468                                 trace_seq_putc(s, '.');
1469                 }
1470         }
1471
1472         if (entry->preempt_count)
1473                 trace_seq_printf(s, "%x", entry->preempt_count);
1474         else
1475                 trace_seq_puts(s, ".");
1476 }
1477
1478 unsigned long preempt_mark_thresh = 100;
1479
1480 static void
1481 lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1482                     unsigned long rel_usecs)
1483 {
1484         trace_seq_printf(s, " %4lldus", abs_usecs);
1485         if (rel_usecs > preempt_mark_thresh)
1486                 trace_seq_puts(s, "!: ");
1487         else if (rel_usecs > 1)
1488                 trace_seq_puts(s, "+: ");
1489         else
1490                 trace_seq_puts(s, " : ");
1491 }
1492
1493 static void test_cpu_buff_start(struct trace_iterator *iter)
1494 {
1495         struct trace_seq *s = &iter->seq;
1496
1497         if (!(trace_flags & TRACE_ITER_ANNOTATE))
1498                 return;
1499
1500         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1501                 return;
1502
1503         if (cpumask_test_cpu(iter->cpu, iter->started))
1504                 return;
1505
1506         cpumask_set_cpu(iter->cpu, iter->started);
1507         trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1508 }
1509
1510 static enum print_line_t
1511 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1512 {
1513         struct trace_seq *s = &iter->seq;
1514         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1515         struct trace_entry *next_entry;
1516         struct trace_event *event;
1517         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1518         struct trace_entry *entry = iter->ent;
1519         unsigned long abs_usecs;
1520         unsigned long rel_usecs;
1521         u64 next_ts;
1522         char *comm;
1523         int ret;
1524
1525         test_cpu_buff_start(iter);
1526
1527         next_entry = find_next_entry(iter, NULL, &next_ts);
1528         if (!next_entry)
1529                 next_ts = iter->ts;
1530         rel_usecs = ns2usecs(next_ts - iter->ts);
1531         abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1532
1533         if (verbose) {
1534                 comm = trace_find_cmdline(entry->pid);
1535                 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1536                                  " %ld.%03ldms (+%ld.%03ldms): ",
1537                                  comm,
1538                                  entry->pid, cpu, entry->flags,
1539                                  entry->preempt_count, trace_idx,
1540                                  ns2usecs(iter->ts),
1541                                  abs_usecs/1000,
1542                                  abs_usecs % 1000, rel_usecs/1000,
1543                                  rel_usecs % 1000);
1544         } else {
1545                 lat_print_generic(s, entry, cpu);
1546                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1547         }
1548
1549         event = ftrace_find_event(entry->type);
1550         if (event && event->latency_trace) {
1551                 ret = event->latency_trace(s, entry, sym_flags);
1552                 if (ret)
1553                         return ret;
1554                 return TRACE_TYPE_HANDLED;
1555         }
1556
1557         trace_seq_printf(s, "Unknown type %d\n", entry->type);
1558         return TRACE_TYPE_HANDLED;
1559 }
1560
1561 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1562 {
1563         struct trace_seq *s = &iter->seq;
1564         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1565         struct trace_entry *entry;
1566         struct trace_event *event;
1567         unsigned long usec_rem;
1568         unsigned long long t;
1569         unsigned long secs;
1570         char *comm;
1571         int ret;
1572
1573         entry = iter->ent;
1574
1575         test_cpu_buff_start(iter);
1576
1577         comm = trace_find_cmdline(iter->ent->pid);
1578
1579         t = ns2usecs(iter->ts);
1580         usec_rem = do_div(t, 1000000ULL);
1581         secs = (unsigned long)t;
1582
1583         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1584         if (!ret)
1585                 return TRACE_TYPE_PARTIAL_LINE;
1586         ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1587         if (!ret)
1588                 return TRACE_TYPE_PARTIAL_LINE;
1589         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1590         if (!ret)
1591                 return TRACE_TYPE_PARTIAL_LINE;
1592
1593         event = ftrace_find_event(entry->type);
1594         if (event && event->trace) {
1595                 ret = event->trace(s, entry, sym_flags);
1596                 if (ret)
1597                         return ret;
1598                 return TRACE_TYPE_HANDLED;
1599         }
1600         ret = trace_seq_printf(s, "Unknown type %d\n", entry->type);
1601         if (!ret)
1602                 return TRACE_TYPE_PARTIAL_LINE;
1603
1604         return TRACE_TYPE_HANDLED;
1605 }
1606
1607 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1608 {
1609         struct trace_seq *s = &iter->seq;
1610         struct trace_entry *entry;
1611         struct trace_event *event;
1612         int ret;
1613
1614         entry = iter->ent;
1615
1616         ret = trace_seq_printf(s, "%d %d %llu ",
1617                 entry->pid, iter->cpu, iter->ts);
1618         if (!ret)
1619                 return TRACE_TYPE_PARTIAL_LINE;
1620
1621         event = ftrace_find_event(entry->type);
1622         if (event && event->raw) {
1623                 ret = event->raw(s, entry, 0);
1624                 if (ret)
1625                         return ret;
1626                 return TRACE_TYPE_HANDLED;
1627         }
1628         ret = trace_seq_printf(s, "%d ?\n", entry->type);
1629         if (!ret)
1630                 return TRACE_TYPE_PARTIAL_LINE;
1631
1632         return TRACE_TYPE_HANDLED;
1633 }
1634
1635 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1636 {
1637         struct trace_seq *s = &iter->seq;
1638         unsigned char newline = '\n';
1639         struct trace_entry *entry;
1640         struct trace_event *event;
1641
1642         entry = iter->ent;
1643
1644         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1645         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1646         SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1647
1648         event = ftrace_find_event(entry->type);
1649         if (event && event->hex)
1650                 event->hex(s, entry, 0);
1651
1652         SEQ_PUT_FIELD_RET(s, newline);
1653
1654         return TRACE_TYPE_HANDLED;
1655 }
1656
1657 static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
1658 {
1659         struct trace_seq *s = &iter->seq;
1660         struct trace_entry *entry = iter->ent;
1661         struct print_entry *field;
1662         int ret;
1663
1664         trace_assign_type(field, entry);
1665
1666         ret = trace_seq_printf(s, field->buf);
1667         if (!ret)
1668                 return TRACE_TYPE_PARTIAL_LINE;
1669
1670         return TRACE_TYPE_HANDLED;
1671 }
1672
1673 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1674 {
1675         struct trace_seq *s = &iter->seq;
1676         struct trace_entry *entry;
1677         struct trace_event *event;
1678
1679         entry = iter->ent;
1680
1681         SEQ_PUT_FIELD_RET(s, entry->pid);
1682         SEQ_PUT_FIELD_RET(s, entry->cpu);
1683         SEQ_PUT_FIELD_RET(s, iter->ts);
1684
1685         event = ftrace_find_event(entry->type);
1686         if (event && event->binary)
1687                 event->binary(s, entry, 0);
1688
1689         return TRACE_TYPE_HANDLED;
1690 }
1691
1692 static int trace_empty(struct trace_iterator *iter)
1693 {
1694         int cpu;
1695
1696         for_each_tracing_cpu(cpu) {
1697                 if (iter->buffer_iter[cpu]) {
1698                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1699                                 return 0;
1700                 } else {
1701                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1702                                 return 0;
1703                 }
1704         }
1705
1706         return 1;
1707 }
1708
1709 static enum print_line_t print_trace_line(struct trace_iterator *iter)
1710 {
1711         enum print_line_t ret;
1712
1713         if (iter->trace && iter->trace->print_line) {
1714                 ret = iter->trace->print_line(iter);
1715                 if (ret != TRACE_TYPE_UNHANDLED)
1716                         return ret;
1717         }
1718
1719         if (iter->ent->type == TRACE_PRINT &&
1720                         trace_flags & TRACE_ITER_PRINTK &&
1721                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1722                 return print_printk_msg_only(iter);
1723
1724         if (trace_flags & TRACE_ITER_BIN)
1725                 return print_bin_fmt(iter);
1726
1727         if (trace_flags & TRACE_ITER_HEX)
1728                 return print_hex_fmt(iter);
1729
1730         if (trace_flags & TRACE_ITER_RAW)
1731                 return print_raw_fmt(iter);
1732
1733         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1734                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1735
1736         return print_trace_fmt(iter);
1737 }
1738
1739 static int s_show(struct seq_file *m, void *v)
1740 {
1741         struct trace_iterator *iter = v;
1742
1743         if (iter->ent == NULL) {
1744                 if (iter->tr) {
1745                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1746                         seq_puts(m, "#\n");
1747                 }
1748                 if (iter->trace && iter->trace->print_header)
1749                         iter->trace->print_header(m);
1750                 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1751                         /* print nothing if the buffers are empty */
1752                         if (trace_empty(iter))
1753                                 return 0;
1754                         print_trace_header(m, iter);
1755                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1756                                 print_lat_help_header(m);
1757                 } else {
1758                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1759                                 print_func_help_header(m);
1760                 }
1761         } else {
1762                 print_trace_line(iter);
1763                 trace_print_seq(m, &iter->seq);
1764         }
1765
1766         return 0;
1767 }
1768
1769 static struct seq_operations tracer_seq_ops = {
1770         .start          = s_start,
1771         .next           = s_next,
1772         .stop           = s_stop,
1773         .show           = s_show,
1774 };
1775
1776 static struct trace_iterator *
1777 __tracing_open(struct inode *inode, struct file *file, int *ret)
1778 {
1779         struct trace_iterator *iter;
1780         struct seq_file *m;
1781         int cpu;
1782
1783         if (tracing_disabled) {
1784                 *ret = -ENODEV;
1785                 return NULL;
1786         }
1787
1788         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1789         if (!iter) {
1790                 *ret = -ENOMEM;
1791                 goto out;
1792         }
1793
1794         mutex_lock(&trace_types_lock);
1795         if (current_trace && current_trace->print_max)
1796                 iter->tr = &max_tr;
1797         else
1798                 iter->tr = inode->i_private;
1799         iter->trace = current_trace;
1800         iter->pos = -1;
1801
1802         /* Notify the tracer early; before we stop tracing. */
1803         if (iter->trace && iter->trace->open)
1804                 iter->trace->open(iter);
1805
1806         /* Annotate start of buffers if we had overruns */
1807         if (ring_buffer_overruns(iter->tr->buffer))
1808                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1809
1810
1811         for_each_tracing_cpu(cpu) {
1812
1813                 iter->buffer_iter[cpu] =
1814                         ring_buffer_read_start(iter->tr->buffer, cpu);
1815
1816                 if (!iter->buffer_iter[cpu])
1817                         goto fail_buffer;
1818         }
1819
1820         /* TODO stop tracer */
1821         *ret = seq_open(file, &tracer_seq_ops);
1822         if (*ret)
1823                 goto fail_buffer;
1824
1825         m = file->private_data;
1826         m->private = iter;
1827
1828         /* stop the trace while dumping */
1829         tracing_stop();
1830
1831         mutex_unlock(&trace_types_lock);
1832
1833  out:
1834         return iter;
1835
1836  fail_buffer:
1837         for_each_tracing_cpu(cpu) {
1838                 if (iter->buffer_iter[cpu])
1839                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1840         }
1841         mutex_unlock(&trace_types_lock);
1842         kfree(iter);
1843
1844         return ERR_PTR(-ENOMEM);
1845 }
1846
1847 int tracing_open_generic(struct inode *inode, struct file *filp)
1848 {
1849         if (tracing_disabled)
1850                 return -ENODEV;
1851
1852         filp->private_data = inode->i_private;
1853         return 0;
1854 }
1855
1856 int tracing_release(struct inode *inode, struct file *file)
1857 {
1858         struct seq_file *m = (struct seq_file *)file->private_data;
1859         struct trace_iterator *iter = m->private;
1860         int cpu;
1861
1862         mutex_lock(&trace_types_lock);
1863         for_each_tracing_cpu(cpu) {
1864                 if (iter->buffer_iter[cpu])
1865                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1866         }
1867
1868         if (iter->trace && iter->trace->close)
1869                 iter->trace->close(iter);
1870
1871         /* reenable tracing if it was previously enabled */
1872         tracing_start();
1873         mutex_unlock(&trace_types_lock);
1874
1875         seq_release(inode, file);
1876         kfree(iter);
1877         return 0;
1878 }
1879
1880 static int tracing_open(struct inode *inode, struct file *file)
1881 {
1882         int ret;
1883
1884         __tracing_open(inode, file, &ret);
1885
1886         return ret;
1887 }
1888
1889 static int tracing_lt_open(struct inode *inode, struct file *file)
1890 {
1891         struct trace_iterator *iter;
1892         int ret;
1893
1894         iter = __tracing_open(inode, file, &ret);
1895
1896         if (!ret)
1897                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1898
1899         return ret;
1900 }
1901
1902
1903 static void *
1904 t_next(struct seq_file *m, void *v, loff_t *pos)
1905 {
1906         struct tracer *t = m->private;
1907
1908         (*pos)++;
1909
1910         if (t)
1911                 t = t->next;
1912
1913         m->private = t;
1914
1915         return t;
1916 }
1917
1918 static void *t_start(struct seq_file *m, loff_t *pos)
1919 {
1920         struct tracer *t = m->private;
1921         loff_t l = 0;
1922
1923         mutex_lock(&trace_types_lock);
1924         for (; t && l < *pos; t = t_next(m, t, &l))
1925                 ;
1926
1927         return t;
1928 }
1929
1930 static void t_stop(struct seq_file *m, void *p)
1931 {
1932         mutex_unlock(&trace_types_lock);
1933 }
1934
1935 static int t_show(struct seq_file *m, void *v)
1936 {
1937         struct tracer *t = v;
1938
1939         if (!t)
1940                 return 0;
1941
1942         seq_printf(m, "%s", t->name);
1943         if (t->next)
1944                 seq_putc(m, ' ');
1945         else
1946                 seq_putc(m, '\n');
1947
1948         return 0;
1949 }
1950
1951 static struct seq_operations show_traces_seq_ops = {
1952         .start          = t_start,
1953         .next           = t_next,
1954         .stop           = t_stop,
1955         .show           = t_show,
1956 };
1957
1958 static int show_traces_open(struct inode *inode, struct file *file)
1959 {
1960         int ret;
1961
1962         if (tracing_disabled)
1963                 return -ENODEV;
1964
1965         ret = seq_open(file, &show_traces_seq_ops);
1966         if (!ret) {
1967                 struct seq_file *m = file->private_data;
1968                 m->private = trace_types;
1969         }
1970
1971         return ret;
1972 }
1973
1974 static struct file_operations tracing_fops = {
1975         .open           = tracing_open,
1976         .read           = seq_read,
1977         .llseek         = seq_lseek,
1978         .release        = tracing_release,
1979 };
1980
1981 static struct file_operations tracing_lt_fops = {
1982         .open           = tracing_lt_open,
1983         .read           = seq_read,
1984         .llseek         = seq_lseek,
1985         .release        = tracing_release,
1986 };
1987
1988 static struct file_operations show_traces_fops = {
1989         .open           = show_traces_open,
1990         .read           = seq_read,
1991         .release        = seq_release,
1992 };
1993
1994 /*
1995  * Only trace on a CPU if the bitmask is set:
1996  */
1997 static cpumask_var_t tracing_cpumask;
1998
1999 /*
2000  * The tracer itself will not take this lock, but still we want
2001  * to provide a consistent cpumask to user-space:
2002  */
2003 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2004
2005 /*
2006  * Temporary storage for the character representation of the
2007  * CPU bitmask (and one more byte for the newline):
2008  */
2009 static char mask_str[NR_CPUS + 1];
2010
2011 static ssize_t
2012 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2013                      size_t count, loff_t *ppos)
2014 {
2015         int len;
2016
2017         mutex_lock(&tracing_cpumask_update_lock);
2018
2019         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2020         if (count - len < 2) {
2021                 count = -EINVAL;
2022                 goto out_err;
2023         }
2024         len += sprintf(mask_str + len, "\n");
2025         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2026
2027 out_err:
2028         mutex_unlock(&tracing_cpumask_update_lock);
2029
2030         return count;
2031 }
2032
2033 static ssize_t
2034 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2035                       size_t count, loff_t *ppos)
2036 {
2037         int err, cpu;
2038         cpumask_var_t tracing_cpumask_new;
2039
2040         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2041                 return -ENOMEM;
2042
2043         mutex_lock(&tracing_cpumask_update_lock);
2044         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2045         if (err)
2046                 goto err_unlock;
2047
2048         local_irq_disable();
2049         __raw_spin_lock(&ftrace_max_lock);
2050         for_each_tracing_cpu(cpu) {
2051                 /*
2052                  * Increase/decrease the disabled counter if we are
2053                  * about to flip a bit in the cpumask:
2054                  */
2055                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2056                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2057                         atomic_inc(&global_trace.data[cpu]->disabled);
2058                 }
2059                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2060                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2061                         atomic_dec(&global_trace.data[cpu]->disabled);
2062                 }
2063         }
2064         __raw_spin_unlock(&ftrace_max_lock);
2065         local_irq_enable();
2066
2067         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2068
2069         mutex_unlock(&tracing_cpumask_update_lock);
2070         free_cpumask_var(tracing_cpumask_new);
2071
2072         return count;
2073
2074 err_unlock:
2075         mutex_unlock(&tracing_cpumask_update_lock);
2076         free_cpumask_var(tracing_cpumask);
2077
2078         return err;
2079 }
2080
2081 static struct file_operations tracing_cpumask_fops = {
2082         .open           = tracing_open_generic,
2083         .read           = tracing_cpumask_read,
2084         .write          = tracing_cpumask_write,
2085 };
2086
2087 static ssize_t
2088 tracing_trace_options_read(struct file *filp, char __user *ubuf,
2089                        size_t cnt, loff_t *ppos)
2090 {
2091         int i;
2092         char *buf;
2093         int r = 0;
2094         int len = 0;
2095         u32 tracer_flags = current_trace->flags->val;
2096         struct tracer_opt *trace_opts = current_trace->flags->opts;
2097
2098
2099         /* calulate max size */
2100         for (i = 0; trace_options[i]; i++) {
2101                 len += strlen(trace_options[i]);
2102                 len += 3; /* "no" and space */
2103         }
2104
2105         /*
2106          * Increase the size with names of options specific
2107          * of the current tracer.
2108          */
2109         for (i = 0; trace_opts[i].name; i++) {
2110                 len += strlen(trace_opts[i].name);
2111                 len += 3; /* "no" and space */
2112         }
2113
2114         /* +2 for \n and \0 */
2115         buf = kmalloc(len + 2, GFP_KERNEL);
2116         if (!buf)
2117                 return -ENOMEM;
2118
2119         for (i = 0; trace_options[i]; i++) {
2120                 if (trace_flags & (1 << i))
2121                         r += sprintf(buf + r, "%s ", trace_options[i]);
2122                 else
2123                         r += sprintf(buf + r, "no%s ", trace_options[i]);
2124         }
2125
2126         for (i = 0; trace_opts[i].name; i++) {
2127                 if (tracer_flags & trace_opts[i].bit)
2128                         r += sprintf(buf + r, "%s ",
2129                                 trace_opts[i].name);
2130                 else
2131                         r += sprintf(buf + r, "no%s ",
2132                                 trace_opts[i].name);
2133         }
2134
2135         r += sprintf(buf + r, "\n");
2136         WARN_ON(r >= len + 2);
2137
2138         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2139
2140         kfree(buf);
2141
2142         return r;
2143 }
2144
2145 /* Try to assign a tracer specific option */
2146 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2147 {
2148         struct tracer_flags *trace_flags = trace->flags;
2149         struct tracer_opt *opts = NULL;
2150         int ret = 0, i = 0;
2151         int len;
2152
2153         for (i = 0; trace_flags->opts[i].name; i++) {
2154                 opts = &trace_flags->opts[i];
2155                 len = strlen(opts->name);
2156
2157                 if (strncmp(cmp, opts->name, len) == 0) {
2158                         ret = trace->set_flag(trace_flags->val,
2159                                 opts->bit, !neg);
2160                         break;
2161                 }
2162         }
2163         /* Not found */
2164         if (!trace_flags->opts[i].name)
2165                 return -EINVAL;
2166
2167         /* Refused to handle */
2168         if (ret)
2169                 return ret;
2170
2171         if (neg)
2172                 trace_flags->val &= ~opts->bit;
2173         else
2174                 trace_flags->val |= opts->bit;
2175
2176         return 0;
2177 }
2178
2179 static ssize_t
2180 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2181                         size_t cnt, loff_t *ppos)
2182 {
2183         char buf[64];
2184         char *cmp = buf;
2185         int neg = 0;
2186         int ret;
2187         int i;
2188
2189         if (cnt >= sizeof(buf))
2190                 return -EINVAL;
2191
2192         if (copy_from_user(&buf, ubuf, cnt))
2193                 return -EFAULT;
2194
2195         buf[cnt] = 0;
2196
2197         if (strncmp(buf, "no", 2) == 0) {
2198                 neg = 1;
2199                 cmp += 2;
2200         }
2201
2202         for (i = 0; trace_options[i]; i++) {
2203                 int len = strlen(trace_options[i]);
2204
2205                 if (strncmp(cmp, trace_options[i], len) == 0) {
2206                         if (neg)
2207                                 trace_flags &= ~(1 << i);
2208                         else
2209                                 trace_flags |= (1 << i);
2210                         break;
2211                 }
2212         }
2213
2214         /* If no option could be set, test the specific tracer options */
2215         if (!trace_options[i]) {
2216                 ret = set_tracer_option(current_trace, cmp, neg);
2217                 if (ret)
2218                         return ret;
2219         }
2220
2221         filp->f_pos += cnt;
2222
2223         return cnt;
2224 }
2225
2226 static struct file_operations tracing_iter_fops = {
2227         .open           = tracing_open_generic,
2228         .read           = tracing_trace_options_read,
2229         .write          = tracing_trace_options_write,
2230 };
2231
2232 static const char readme_msg[] =
2233         "tracing mini-HOWTO:\n\n"
2234         "# mkdir /debug\n"
2235         "# mount -t debugfs nodev /debug\n\n"
2236         "# cat /debug/tracing/available_tracers\n"
2237         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2238         "# cat /debug/tracing/current_tracer\n"
2239         "none\n"
2240         "# echo sched_switch > /debug/tracing/current_tracer\n"
2241         "# cat /debug/tracing/current_tracer\n"
2242         "sched_switch\n"
2243         "# cat /debug/tracing/trace_options\n"
2244         "noprint-parent nosym-offset nosym-addr noverbose\n"
2245         "# echo print-parent > /debug/tracing/trace_options\n"
2246         "# echo 1 > /debug/tracing/tracing_enabled\n"
2247         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2248         "echo 0 > /debug/tracing/tracing_enabled\n"
2249 ;
2250
2251 static ssize_t
2252 tracing_readme_read(struct file *filp, char __user *ubuf,
2253                        size_t cnt, loff_t *ppos)
2254 {
2255         return simple_read_from_buffer(ubuf, cnt, ppos,
2256                                         readme_msg, strlen(readme_msg));
2257 }
2258
2259 static struct file_operations tracing_readme_fops = {
2260         .open           = tracing_open_generic,
2261         .read           = tracing_readme_read,
2262 };
2263
2264 static ssize_t
2265 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2266                   size_t cnt, loff_t *ppos)
2267 {
2268         char buf[64];
2269         int r;
2270
2271         r = sprintf(buf, "%u\n", tracer_enabled);
2272         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2273 }
2274
2275 static ssize_t
2276 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2277                    size_t cnt, loff_t *ppos)
2278 {
2279         struct trace_array *tr = filp->private_data;
2280         char buf[64];
2281         long val;
2282         int ret;
2283
2284         if (cnt >= sizeof(buf))
2285                 return -EINVAL;
2286
2287         if (copy_from_user(&buf, ubuf, cnt))
2288                 return -EFAULT;
2289
2290         buf[cnt] = 0;
2291
2292         ret = strict_strtoul(buf, 10, &val);
2293         if (ret < 0)
2294                 return ret;
2295
2296         val = !!val;
2297
2298         mutex_lock(&trace_types_lock);
2299         if (tracer_enabled ^ val) {
2300                 if (val) {
2301                         tracer_enabled = 1;
2302                         if (current_trace->start)
2303                                 current_trace->start(tr);
2304                         tracing_start();
2305                 } else {
2306                         tracer_enabled = 0;
2307                         tracing_stop();
2308                         if (current_trace->stop)
2309                                 current_trace->stop(tr);
2310                 }
2311         }
2312         mutex_unlock(&trace_types_lock);
2313
2314         filp->f_pos += cnt;
2315
2316         return cnt;
2317 }
2318
2319 static ssize_t
2320 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2321                        size_t cnt, loff_t *ppos)
2322 {
2323         char buf[max_tracer_type_len+2];
2324         int r;
2325
2326         mutex_lock(&trace_types_lock);
2327         if (current_trace)
2328                 r = sprintf(buf, "%s\n", current_trace->name);
2329         else
2330                 r = sprintf(buf, "\n");
2331         mutex_unlock(&trace_types_lock);
2332
2333         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2334 }
2335
2336 static int tracing_set_tracer(char *buf)
2337 {
2338         struct trace_array *tr = &global_trace;
2339         struct tracer *t;
2340         int ret = 0;
2341
2342         mutex_lock(&trace_types_lock);
2343         for (t = trace_types; t; t = t->next) {
2344                 if (strcmp(t->name, buf) == 0)
2345                         break;
2346         }
2347         if (!t) {
2348                 ret = -EINVAL;
2349                 goto out;
2350         }
2351         if (t == current_trace)
2352                 goto out;
2353
2354         trace_branch_disable();
2355         if (current_trace && current_trace->reset)
2356                 current_trace->reset(tr);
2357
2358         current_trace = t;
2359         if (t->init) {
2360                 ret = t->init(tr);
2361                 if (ret)
2362                         goto out;
2363         }
2364
2365         trace_branch_enable(tr);
2366  out:
2367         mutex_unlock(&trace_types_lock);
2368
2369         return ret;
2370 }
2371
2372 static ssize_t
2373 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2374                         size_t cnt, loff_t *ppos)
2375 {
2376         char buf[max_tracer_type_len+1];
2377         int i;
2378         size_t ret;
2379         int err;
2380
2381         ret = cnt;
2382
2383         if (cnt > max_tracer_type_len)
2384                 cnt = max_tracer_type_len;
2385
2386         if (copy_from_user(&buf, ubuf, cnt))
2387                 return -EFAULT;
2388
2389         buf[cnt] = 0;
2390
2391         /* strip ending whitespace. */
2392         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2393                 buf[i] = 0;
2394
2395         err = tracing_set_tracer(buf);
2396         if (err)
2397                 return err;
2398
2399         filp->f_pos += ret;
2400
2401         return ret;
2402 }
2403
2404 static ssize_t
2405 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2406                      size_t cnt, loff_t *ppos)
2407 {
2408         unsigned long *ptr = filp->private_data;
2409         char buf[64];
2410         int r;
2411
2412         r = snprintf(buf, sizeof(buf), "%ld\n",
2413                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2414         if (r > sizeof(buf))
2415                 r = sizeof(buf);
2416         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2417 }
2418
2419 static ssize_t
2420 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2421                       size_t cnt, loff_t *ppos)
2422 {
2423         long *ptr = filp->private_data;
2424         char buf[64];
2425         long val;
2426         int ret;
2427
2428         if (cnt >= sizeof(buf))
2429                 return -EINVAL;
2430
2431         if (copy_from_user(&buf, ubuf, cnt))
2432                 return -EFAULT;
2433
2434         buf[cnt] = 0;
2435
2436         ret = strict_strtoul(buf, 10, &val);
2437         if (ret < 0)
2438                 return ret;
2439
2440         *ptr = val * 1000;
2441
2442         return cnt;
2443 }
2444
2445 static atomic_t tracing_reader;
2446
2447 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2448 {
2449         struct trace_iterator *iter;
2450
2451         if (tracing_disabled)
2452                 return -ENODEV;
2453
2454         /* We only allow for reader of the pipe */
2455         if (atomic_inc_return(&tracing_reader) != 1) {
2456                 atomic_dec(&tracing_reader);
2457                 return -EBUSY;
2458         }
2459
2460         /* create a buffer to store the information to pass to userspace */
2461         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2462         if (!iter)
2463                 return -ENOMEM;
2464
2465         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2466                 kfree(iter);
2467                 return -ENOMEM;
2468         }
2469
2470         mutex_lock(&trace_types_lock);
2471
2472         /* trace pipe does not show start of buffer */
2473         cpumask_setall(iter->started);
2474
2475         iter->tr = &global_trace;
2476         iter->trace = current_trace;
2477         filp->private_data = iter;
2478
2479         if (iter->trace->pipe_open)
2480                 iter->trace->pipe_open(iter);
2481         mutex_unlock(&trace_types_lock);
2482
2483         return 0;
2484 }
2485
2486 static int tracing_release_pipe(struct inode *inode, struct file *file)
2487 {
2488         struct trace_iterator *iter = file->private_data;
2489
2490         free_cpumask_var(iter->started);
2491         kfree(iter);
2492         atomic_dec(&tracing_reader);
2493
2494         return 0;
2495 }
2496
2497 static unsigned int
2498 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2499 {
2500         struct trace_iterator *iter = filp->private_data;
2501
2502         if (trace_flags & TRACE_ITER_BLOCK) {
2503                 /*
2504                  * Always select as readable when in blocking mode
2505                  */
2506                 return POLLIN | POLLRDNORM;
2507         } else {
2508                 if (!trace_empty(iter))
2509                         return POLLIN | POLLRDNORM;
2510                 poll_wait(filp, &trace_wait, poll_table);
2511                 if (!trace_empty(iter))
2512                         return POLLIN | POLLRDNORM;
2513
2514                 return 0;
2515         }
2516 }
2517
2518 /*
2519  * Consumer reader.
2520  */
2521 static ssize_t
2522 tracing_read_pipe(struct file *filp, char __user *ubuf,
2523                   size_t cnt, loff_t *ppos)
2524 {
2525         struct trace_iterator *iter = filp->private_data;
2526         ssize_t sret;
2527
2528         /* return any leftover data */
2529         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2530         if (sret != -EBUSY)
2531                 return sret;
2532
2533         trace_seq_reset(&iter->seq);
2534
2535         mutex_lock(&trace_types_lock);
2536         if (iter->trace->read) {
2537                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2538                 if (sret)
2539                         goto out;
2540         }
2541
2542 waitagain:
2543         sret = 0;
2544         while (trace_empty(iter)) {
2545
2546                 if ((filp->f_flags & O_NONBLOCK)) {
2547                         sret = -EAGAIN;
2548                         goto out;
2549                 }
2550
2551                 /*
2552                  * This is a make-shift waitqueue. The reason we don't use
2553                  * an actual wait queue is because:
2554                  *  1) we only ever have one waiter
2555                  *  2) the tracing, traces all functions, we don't want
2556                  *     the overhead of calling wake_up and friends
2557                  *     (and tracing them too)
2558                  *     Anyway, this is really very primitive wakeup.
2559                  */
2560                 set_current_state(TASK_INTERRUPTIBLE);
2561                 iter->tr->waiter = current;
2562
2563                 mutex_unlock(&trace_types_lock);
2564
2565                 /* sleep for 100 msecs, and try again. */
2566                 schedule_timeout(HZ/10);
2567
2568                 mutex_lock(&trace_types_lock);
2569
2570                 iter->tr->waiter = NULL;
2571
2572                 if (signal_pending(current)) {
2573                         sret = -EINTR;
2574                         goto out;
2575                 }
2576
2577                 if (iter->trace != current_trace)
2578                         goto out;
2579
2580                 /*
2581                  * We block until we read something and tracing is disabled.
2582                  * We still block if tracing is disabled, but we have never
2583                  * read anything. This allows a user to cat this file, and
2584                  * then enable tracing. But after we have read something,
2585                  * we give an EOF when tracing is again disabled.
2586                  *
2587                  * iter->pos will be 0 if we haven't read anything.
2588                  */
2589                 if (!tracer_enabled && iter->pos)
2590                         break;
2591
2592                 continue;
2593         }
2594
2595         /* stop when tracing is finished */
2596         if (trace_empty(iter))
2597                 goto out;
2598
2599         if (cnt >= PAGE_SIZE)
2600                 cnt = PAGE_SIZE - 1;
2601
2602         /* reset all but tr, trace, and overruns */
2603         memset(&iter->seq, 0,
2604                sizeof(struct trace_iterator) -
2605                offsetof(struct trace_iterator, seq));
2606         iter->pos = -1;
2607
2608         while (find_next_entry_inc(iter) != NULL) {
2609                 enum print_line_t ret;
2610                 int len = iter->seq.len;
2611
2612                 ret = print_trace_line(iter);
2613                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2614                         /* don't print partial lines */
2615                         iter->seq.len = len;
2616                         break;
2617                 }
2618
2619                 trace_consume(iter);
2620
2621                 if (iter->seq.len >= cnt)
2622                         break;
2623         }
2624
2625         /* Now copy what we have to the user */
2626         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2627         if (iter->seq.readpos >= iter->seq.len)
2628                 trace_seq_reset(&iter->seq);
2629
2630         /*
2631          * If there was nothing to send to user, inspite of consuming trace
2632          * entries, go back to wait for more entries.
2633          */
2634         if (sret == -EBUSY)
2635                 goto waitagain;
2636
2637 out:
2638         mutex_unlock(&trace_types_lock);
2639
2640         return sret;
2641 }
2642
2643 static ssize_t
2644 tracing_entries_read(struct file *filp, char __user *ubuf,
2645                      size_t cnt, loff_t *ppos)
2646 {
2647         struct trace_array *tr = filp->private_data;
2648         char buf[64];
2649         int r;
2650
2651         r = sprintf(buf, "%lu\n", tr->entries >> 10);
2652         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2653 }
2654
2655 static ssize_t
2656 tracing_entries_write(struct file *filp, const char __user *ubuf,
2657                       size_t cnt, loff_t *ppos)
2658 {
2659         unsigned long val;
2660         char buf[64];
2661         int ret, cpu;
2662
2663         if (cnt >= sizeof(buf))
2664                 return -EINVAL;
2665
2666         if (copy_from_user(&buf, ubuf, cnt))
2667                 return -EFAULT;
2668
2669         buf[cnt] = 0;
2670
2671         ret = strict_strtoul(buf, 10, &val);
2672         if (ret < 0)
2673                 return ret;
2674
2675         /* must have at least 1 entry */
2676         if (!val)
2677                 return -EINVAL;
2678
2679         mutex_lock(&trace_types_lock);
2680
2681         tracing_stop();
2682
2683         /* disable all cpu buffers */
2684         for_each_tracing_cpu(cpu) {
2685                 if (global_trace.data[cpu])
2686                         atomic_inc(&global_trace.data[cpu]->disabled);
2687                 if (max_tr.data[cpu])
2688                         atomic_inc(&max_tr.data[cpu]->disabled);
2689         }
2690
2691         /* value is in KB */
2692         val <<= 10;
2693
2694         if (val != global_trace.entries) {
2695                 ret = ring_buffer_resize(global_trace.buffer, val);
2696                 if (ret < 0) {
2697                         cnt = ret;
2698                         goto out;
2699                 }
2700
2701                 ret = ring_buffer_resize(max_tr.buffer, val);
2702                 if (ret < 0) {
2703                         int r;
2704                         cnt = ret;
2705                         r = ring_buffer_resize(global_trace.buffer,
2706                                                global_trace.entries);
2707                         if (r < 0) {
2708                                 /* AARGH! We are left with different
2709                                  * size max buffer!!!! */
2710                                 WARN_ON(1);
2711                                 tracing_disabled = 1;
2712                         }
2713                         goto out;
2714                 }
2715
2716                 global_trace.entries = val;
2717         }
2718
2719         filp->f_pos += cnt;
2720
2721         /* If check pages failed, return ENOMEM */
2722         if (tracing_disabled)
2723                 cnt = -ENOMEM;
2724  out:
2725         for_each_tracing_cpu(cpu) {
2726                 if (global_trace.data[cpu])
2727                         atomic_dec(&global_trace.data[cpu]->disabled);
2728                 if (max_tr.data[cpu])
2729                         atomic_dec(&max_tr.data[cpu]->disabled);
2730         }
2731
2732         tracing_start();
2733         max_tr.entries = global_trace.entries;
2734         mutex_unlock(&trace_types_lock);
2735
2736         return cnt;
2737 }
2738
2739 static int mark_printk(const char *fmt, ...)
2740 {
2741         int ret;
2742         va_list args;
2743         va_start(args, fmt);
2744         ret = trace_vprintk(0, -1, fmt, args);
2745         va_end(args);
2746         return ret;
2747 }
2748
2749 static ssize_t
2750 tracing_mark_write(struct file *filp, const char __user *ubuf,
2751                                         size_t cnt, loff_t *fpos)
2752 {
2753         char *buf;
2754         char *end;
2755
2756         if (tracing_disabled)
2757                 return -EINVAL;
2758
2759         if (cnt > TRACE_BUF_SIZE)
2760                 cnt = TRACE_BUF_SIZE;
2761
2762         buf = kmalloc(cnt + 1, GFP_KERNEL);
2763         if (buf == NULL)
2764                 return -ENOMEM;
2765
2766         if (copy_from_user(buf, ubuf, cnt)) {
2767                 kfree(buf);
2768                 return -EFAULT;
2769         }
2770
2771         /* Cut from the first nil or newline. */
2772         buf[cnt] = '\0';
2773         end = strchr(buf, '\n');
2774         if (end)
2775                 *end = '\0';
2776
2777         cnt = mark_printk("%s\n", buf);
2778         kfree(buf);
2779         *fpos += cnt;
2780
2781         return cnt;
2782 }
2783
2784 static struct file_operations tracing_max_lat_fops = {
2785         .open           = tracing_open_generic,
2786         .read           = tracing_max_lat_read,
2787         .write          = tracing_max_lat_write,
2788 };
2789
2790 static struct file_operations tracing_ctrl_fops = {
2791         .open           = tracing_open_generic,
2792         .read           = tracing_ctrl_read,
2793         .write          = tracing_ctrl_write,
2794 };
2795
2796 static struct file_operations set_tracer_fops = {
2797         .open           = tracing_open_generic,
2798         .read           = tracing_set_trace_read,
2799         .write          = tracing_set_trace_write,
2800 };
2801
2802 static struct file_operations tracing_pipe_fops = {
2803         .open           = tracing_open_pipe,
2804         .poll           = tracing_poll_pipe,
2805         .read           = tracing_read_pipe,
2806         .release        = tracing_release_pipe,
2807 };
2808
2809 static struct file_operations tracing_entries_fops = {
2810         .open           = tracing_open_generic,
2811         .read           = tracing_entries_read,
2812         .write          = tracing_entries_write,
2813 };
2814
2815 static struct file_operations tracing_mark_fops = {
2816         .open           = tracing_open_generic,
2817         .write          = tracing_mark_write,
2818 };
2819
2820 #ifdef CONFIG_DYNAMIC_FTRACE
2821
2822 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
2823 {
2824         return 0;
2825 }
2826
2827 static ssize_t
2828 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2829                   size_t cnt, loff_t *ppos)
2830 {
2831         static char ftrace_dyn_info_buffer[1024];
2832         static DEFINE_MUTEX(dyn_info_mutex);
2833         unsigned long *p = filp->private_data;
2834         char *buf = ftrace_dyn_info_buffer;
2835         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2836         int r;
2837
2838         mutex_lock(&dyn_info_mutex);
2839         r = sprintf(buf, "%ld ", *p);
2840
2841         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
2842         buf[r++] = '\n';
2843
2844         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2845
2846         mutex_unlock(&dyn_info_mutex);
2847
2848         return r;
2849 }
2850
2851 static struct file_operations tracing_dyn_info_fops = {
2852         .open           = tracing_open_generic,
2853         .read           = tracing_read_dyn_info,
2854 };
2855 #endif
2856
2857 static struct dentry *d_tracer;
2858
2859 struct dentry *tracing_init_dentry(void)
2860 {
2861         static int once;
2862
2863         if (d_tracer)
2864                 return d_tracer;
2865
2866         d_tracer = debugfs_create_dir("tracing", NULL);
2867
2868         if (!d_tracer && !once) {
2869                 once = 1;
2870                 pr_warning("Could not create debugfs directory 'tracing'\n");
2871                 return NULL;
2872         }
2873
2874         return d_tracer;
2875 }
2876
2877 #ifdef CONFIG_FTRACE_SELFTEST
2878 /* Let selftest have access to static functions in this file */
2879 #include "trace_selftest.c"
2880 #endif
2881
2882 static __init int tracer_init_debugfs(void)
2883 {
2884         struct dentry *d_tracer;
2885         struct dentry *entry;
2886
2887         d_tracer = tracing_init_dentry();
2888
2889         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2890                                     &global_trace, &tracing_ctrl_fops);
2891         if (!entry)
2892                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2893
2894         entry = debugfs_create_file("trace_options", 0644, d_tracer,
2895                                     NULL, &tracing_iter_fops);
2896         if (!entry)
2897                 pr_warning("Could not create debugfs 'trace_options' entry\n");
2898
2899         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2900                                     NULL, &tracing_cpumask_fops);
2901         if (!entry)
2902                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2903
2904         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2905                                     &global_trace, &tracing_lt_fops);
2906         if (!entry)
2907                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2908
2909         entry = debugfs_create_file("trace", 0444, d_tracer,
2910                                     &global_trace, &tracing_fops);
2911         if (!entry)
2912                 pr_warning("Could not create debugfs 'trace' entry\n");
2913
2914         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2915                                     &global_trace, &show_traces_fops);
2916         if (!entry)
2917                 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2918
2919         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2920                                     &global_trace, &set_tracer_fops);
2921         if (!entry)
2922                 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2923
2924         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2925                                     &tracing_max_latency,
2926                                     &tracing_max_lat_fops);
2927         if (!entry)
2928                 pr_warning("Could not create debugfs "
2929                            "'tracing_max_latency' entry\n");
2930
2931         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2932                                     &tracing_thresh, &tracing_max_lat_fops);
2933         if (!entry)
2934                 pr_warning("Could not create debugfs "
2935                            "'tracing_thresh' entry\n");
2936         entry = debugfs_create_file("README", 0644, d_tracer,
2937                                     NULL, &tracing_readme_fops);
2938         if (!entry)
2939                 pr_warning("Could not create debugfs 'README' entry\n");
2940
2941         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2942                                     NULL, &tracing_pipe_fops);
2943         if (!entry)
2944                 pr_warning("Could not create debugfs "
2945                            "'trace_pipe' entry\n");
2946
2947         entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2948                                     &global_trace, &tracing_entries_fops);
2949         if (!entry)
2950                 pr_warning("Could not create debugfs "
2951                            "'buffer_size_kb' entry\n");
2952
2953         entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2954                                     NULL, &tracing_mark_fops);
2955         if (!entry)
2956                 pr_warning("Could not create debugfs "
2957                            "'trace_marker' entry\n");
2958
2959 #ifdef CONFIG_DYNAMIC_FTRACE
2960         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2961                                     &ftrace_update_tot_cnt,
2962                                     &tracing_dyn_info_fops);
2963         if (!entry)
2964                 pr_warning("Could not create debugfs "
2965                            "'dyn_ftrace_total_info' entry\n");
2966 #endif
2967 #ifdef CONFIG_SYSPROF_TRACER
2968         init_tracer_sysprof_debugfs(d_tracer);
2969 #endif
2970         return 0;
2971 }
2972
2973 int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
2974 {
2975         static DEFINE_SPINLOCK(trace_buf_lock);
2976         static char trace_buf[TRACE_BUF_SIZE];
2977
2978         struct ring_buffer_event *event;
2979         struct trace_array *tr = &global_trace;
2980         struct trace_array_cpu *data;
2981         int cpu, len = 0, size, pc;
2982         struct print_entry *entry;
2983         unsigned long irq_flags;
2984
2985         if (tracing_disabled || tracing_selftest_running)
2986                 return 0;
2987
2988         pc = preempt_count();
2989         preempt_disable_notrace();
2990         cpu = raw_smp_processor_id();
2991         data = tr->data[cpu];
2992
2993         if (unlikely(atomic_read(&data->disabled)))
2994                 goto out;
2995
2996         pause_graph_tracing();
2997         spin_lock_irqsave(&trace_buf_lock, irq_flags);
2998         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2999
3000         len = min(len, TRACE_BUF_SIZE-1);
3001         trace_buf[len] = 0;
3002
3003         size = sizeof(*entry) + len + 1;
3004         event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3005         if (!event)
3006                 goto out_unlock;
3007         entry = ring_buffer_event_data(event);
3008         tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3009         entry->ent.type                 = TRACE_PRINT;
3010         entry->ip                       = ip;
3011         entry->depth                    = depth;
3012
3013         memcpy(&entry->buf, trace_buf, len);
3014         entry->buf[len] = 0;
3015         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3016
3017  out_unlock:
3018         spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3019         unpause_graph_tracing();
3020  out:
3021         preempt_enable_notrace();
3022
3023         return len;
3024 }
3025 EXPORT_SYMBOL_GPL(trace_vprintk);
3026
3027 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3028 {
3029         int ret;
3030         va_list ap;
3031
3032         if (!(trace_flags & TRACE_ITER_PRINTK))
3033                 return 0;
3034
3035         va_start(ap, fmt);
3036         ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3037         va_end(ap);
3038         return ret;
3039 }
3040 EXPORT_SYMBOL_GPL(__ftrace_printk);
3041
3042 static int trace_panic_handler(struct notifier_block *this,
3043                                unsigned long event, void *unused)
3044 {
3045         if (ftrace_dump_on_oops)
3046                 ftrace_dump();
3047         return NOTIFY_OK;
3048 }
3049
3050 static struct notifier_block trace_panic_notifier = {
3051         .notifier_call  = trace_panic_handler,
3052         .next           = NULL,
3053         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
3054 };
3055
3056 static int trace_die_handler(struct notifier_block *self,
3057                              unsigned long val,
3058                              void *data)
3059 {
3060         switch (val) {
3061         case DIE_OOPS:
3062                 if (ftrace_dump_on_oops)
3063                         ftrace_dump();
3064                 break;
3065         default:
3066                 break;
3067         }
3068         return NOTIFY_OK;
3069 }
3070
3071 static struct notifier_block trace_die_notifier = {
3072         .notifier_call = trace_die_handler,
3073         .priority = 200
3074 };
3075
3076 /*
3077  * printk is set to max of 1024, we really don't need it that big.
3078  * Nothing should be printing 1000 characters anyway.
3079  */
3080 #define TRACE_MAX_PRINT         1000
3081
3082 /*
3083  * Define here KERN_TRACE so that we have one place to modify
3084  * it if we decide to change what log level the ftrace dump
3085  * should be at.
3086  */
3087 #define KERN_TRACE              KERN_EMERG
3088
3089 static void
3090 trace_printk_seq(struct trace_seq *s)
3091 {
3092         /* Probably should print a warning here. */
3093         if (s->len >= 1000)
3094                 s->len = 1000;
3095
3096         /* should be zero ended, but we are paranoid. */
3097         s->buffer[s->len] = 0;
3098
3099         printk(KERN_TRACE "%s", s->buffer);
3100
3101         trace_seq_reset(s);
3102 }
3103
3104 void ftrace_dump(void)
3105 {
3106         static DEFINE_SPINLOCK(ftrace_dump_lock);
3107         /* use static because iter can be a bit big for the stack */
3108         static struct trace_iterator iter;
3109         static int dump_ran;
3110         unsigned long flags;
3111         int cnt = 0, cpu;
3112
3113         /* only one dump */
3114         spin_lock_irqsave(&ftrace_dump_lock, flags);
3115         if (dump_ran)
3116                 goto out;
3117
3118         dump_ran = 1;
3119
3120         /* No turning back! */
3121         tracing_off();
3122         ftrace_kill();
3123
3124         for_each_tracing_cpu(cpu) {
3125                 atomic_inc(&global_trace.data[cpu]->disabled);
3126         }
3127
3128         /* don't look at user memory in panic mode */
3129         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3130
3131         printk(KERN_TRACE "Dumping ftrace buffer:\n");
3132
3133         iter.tr = &global_trace;
3134         iter.trace = current_trace;
3135
3136         /*
3137          * We need to stop all tracing on all CPUS to read the
3138          * the next buffer. This is a bit expensive, but is
3139          * not done often. We fill all what we can read,
3140          * and then release the locks again.
3141          */
3142
3143         while (!trace_empty(&iter)) {
3144
3145                 if (!cnt)
3146                         printk(KERN_TRACE "---------------------------------\n");
3147
3148                 cnt++;
3149
3150                 /* reset all but tr, trace, and overruns */
3151                 memset(&iter.seq, 0,
3152                        sizeof(struct trace_iterator) -
3153                        offsetof(struct trace_iterator, seq));
3154                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3155                 iter.pos = -1;
3156
3157                 if (find_next_entry_inc(&iter) != NULL) {
3158                         print_trace_line(&iter);
3159                         trace_consume(&iter);
3160                 }
3161
3162                 trace_printk_seq(&iter.seq);
3163         }
3164
3165         if (!cnt)
3166                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
3167         else
3168                 printk(KERN_TRACE "---------------------------------\n");
3169
3170  out:
3171         spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3172 }
3173
3174 __init static int tracer_alloc_buffers(void)
3175 {
3176         struct trace_array_cpu *data;
3177         int i;
3178         int ret = -ENOMEM;
3179
3180         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
3181                 goto out;
3182
3183         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
3184                 goto out_free_buffer_mask;
3185
3186         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
3187         cpumask_copy(tracing_cpumask, cpu_all_mask);
3188
3189         /* TODO: make the number of buffers hot pluggable with CPUS */
3190         global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3191                                                    TRACE_BUFFER_FLAGS);
3192         if (!global_trace.buffer) {
3193                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3194                 WARN_ON(1);
3195                 goto out_free_cpumask;
3196         }
3197         global_trace.entries = ring_buffer_size(global_trace.buffer);
3198
3199
3200 #ifdef CONFIG_TRACER_MAX_TRACE
3201         max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3202                                              TRACE_BUFFER_FLAGS);
3203         if (!max_tr.buffer) {
3204                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3205                 WARN_ON(1);
3206                 ring_buffer_free(global_trace.buffer);
3207                 goto out_free_cpumask;
3208         }
3209         max_tr.entries = ring_buffer_size(max_tr.buffer);
3210         WARN_ON(max_tr.entries != global_trace.entries);
3211 #endif
3212
3213         /* Allocate the first page for all buffers */
3214         for_each_tracing_cpu(i) {
3215                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3216                 max_tr.data[i] = &per_cpu(max_data, i);
3217         }
3218
3219         trace_init_cmdlines();
3220
3221         register_tracer(&nop_trace);
3222 #ifdef CONFIG_BOOT_TRACER
3223         register_tracer(&boot_tracer);
3224         current_trace = &boot_tracer;
3225         current_trace->init(&global_trace);
3226 #else
3227         current_trace = &nop_trace;
3228 #endif
3229         /* All seems OK, enable tracing */
3230         tracing_disabled = 0;
3231
3232         atomic_notifier_chain_register(&panic_notifier_list,
3233                                        &trace_panic_notifier);
3234
3235         register_die_notifier(&trace_die_notifier);
3236         ret = 0;
3237
3238 out_free_cpumask:
3239         free_cpumask_var(tracing_cpumask);
3240 out_free_buffer_mask:
3241         free_cpumask_var(tracing_buffer_mask);
3242 out:
3243         return ret;
3244 }
3245 early_initcall(tracer_alloc_buffers);
3246 fs_initcall(tracer_init_debugfs);