X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Ftrace%2Ftrace.c;h=6adf660fc8163c83cafb2fde2bea12fcf80a7bd5;hb=968ea6d80e395cf11a51143cfa1b9a14ada676df;hp=b38a4bb40548811ad4d4aa7b0b0c5cc5d229cc96;hpb=2c4f035f6c3e8fda661eb6105aa51ef07aa71607;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b38a4bb..6adf660 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,10 +30,12 @@ #include #include #include +#include #include #include #include +#include #include "trace.h" @@ -42,12 +44,87 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* + * We need to change this state when a selftest is running. + * A selftest will lurk into the ring-buffer to count the + * entries inserted during the selftest although some concurrent + * insertions into the ring-buffer such as ftrace_printk could occurred + * at the same time, giving false positive or negative results. + */ +static bool __read_mostly tracing_selftest_running; + +/* For tracers that don't implement custom flags */ +static struct tracer_opt dummy_tracer_opt[] = { + { } +}; + +static struct tracer_flags dummy_tracer_flags = { + .val = 0, + .opts = dummy_tracer_opt +}; + +static int dummy_set_flag(u32 old_flags, u32 bit, int set) +{ + return 0; +} + +/* + * Kill all tracing for good (never come back). + * It is initialized to 1 but will turn to zero if the initialization + * of the tracer is successful. But that is the only place that sets + * this back to zero. + */ +int tracing_disabled = 1; + +static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); + +static inline void ftrace_disable_cpu(void) +{ + preempt_disable(); + local_inc(&__get_cpu_var(ftrace_cpu_disabled)); +} + +static inline void ftrace_enable_cpu(void) +{ + local_dec(&__get_cpu_var(ftrace_cpu_disabled)); + preempt_enable(); +} + static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -static int tracing_disabled = 1; +/* + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops + * + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops + * is set, then ftrace_dump is called. This will output the contents + * of the ftrace buffers to the console. This is very useful for + * capturing traces that lead to crashes and outputing it to a + * serial console. + * + * It is default off, but you can enable it with either specifying + * "ftrace_dump_on_oops" in the kernel command line, or setting + * /proc/sys/kernel/ftrace_dump_on_oops to true. + */ +int ftrace_dump_on_oops; + +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + +static int __init set_ftrace_dump_on_oops(char *str) +{ + ftrace_dump_on_oops = 1; + return 1; +} +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); long ns2usecs(cycle_t nsec) @@ -97,6 +174,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data); /* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; +/** + * tracing_is_enabled - return tracer_enabled status + * + * This function is used by other tracers to know the status + * of the tracer_enabled flag. Tracers may use this function + * to know if it should enable their features when starting + * up. See irqsoff tracer for an example (start_irqsoff_tracer). + */ +int tracing_is_enabled(void) +{ + return tracer_enabled; +} + /* function tracing enabled */ int ftrace_function_enabled; @@ -138,8 +228,9 @@ static DEFINE_MUTEX(trace_types_lock); /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); -/* trace_flags holds iter_ctrl options */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +/* trace_flags holds trace_options default values */ +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | + TRACE_ITER_ANNOTATE; /** * trace_wake_up - wake up tasks waiting for trace input @@ -178,13 +269,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -/* - * TRACE_ITER_SYM_MASK masks the options in trace_flags that - * control the output of kernel symbols. - */ -#define TRACE_ITER_SYM_MASK \ - (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) - /* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", @@ -198,6 +282,11 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", + "ftrace_preempt", + "branch", + "annotate", + "userstacktrace", + "sym-userobj", NULL }; @@ -321,34 +410,51 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) return len; } -#define HEX_CHARS 17 -static const char hex2asc[] = "0123456789abcdef"; +#define MAX_MEMHEX_BYTES 8 +#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) static int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) { unsigned char hex[HEX_CHARS]; unsigned char *data = mem; - unsigned char byte; int i, j; - BUG_ON(len >= HEX_CHARS); - #ifdef __BIG_ENDIAN for (i = 0, j = 0; i < len; i++) { #else for (i = len-1, j = 0; i >= 0; i--) { #endif - byte = data[i]; - - hex[j++] = hex2asc[byte & 0x0f]; - hex[j++] = hex2asc[byte >> 4]; + hex[j++] = hex_asc_hi(data[i]); + hex[j++] = hex_asc_lo(data[i]); } hex[j++] = ' '; return trace_seq_putmem(s, hex, j); } +static int +trace_seq_path(struct trace_seq *s, struct path *path) +{ + unsigned char *p; + + if (s->len >= (PAGE_SIZE - 1)) + return 0; + p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); + if (!IS_ERR(p)) { + p = mangle_path(s->buffer + s->len, p, "\n"); + if (p) { + s->len = p - s->buffer; + return 1; + } + } else { + s->buffer[s->len++] = '?'; + return 1; + } + + return 0; +} + static void trace_seq_reset(struct trace_seq *s) { @@ -406,7 +512,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tr->buffer = max_tr.buffer; max_tr.buffer = buf; + ftrace_disable_cpu(); ring_buffer_reset(tr->buffer); + ftrace_enable_cpu(); __update_max_tr(tr, tsk, cpu); __raw_spin_unlock(&ftrace_max_lock); @@ -428,9 +536,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); + ftrace_disable_cpu(); + ring_buffer_reset(max_tr.buffer); ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); + ftrace_enable_cpu(); + WARN_ON_ONCE(ret); __update_max_tr(tr, tsk, cpu); @@ -454,7 +566,17 @@ int register_tracer(struct tracer *type) return -1; } + /* + * When this gets called we hold the BKL which means that + * preemption is disabled. Various trace selftests however + * need to disable and enable preemption for successful tests. + * So we drop the BKL here and grab it after the tests again. + */ + unlock_kernel(); mutex_lock(&trace_types_lock); + + tracing_selftest_running = true; + for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -465,12 +587,20 @@ int register_tracer(struct tracer *type) } } + if (!type->set_flag) + type->set_flag = &dummy_set_flag; + if (!type->flags) + type->flags = &dummy_tracer_flags; + else + if (!type->flags->opts) + type->flags->opts = dummy_tracer_opt; + #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; - int saved_ctrl = tr->ctrl; int i; + /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -478,25 +608,23 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - for_each_tracing_cpu(i) { + for_each_tracing_cpu(i) tracing_reset(tr, i); - } + current_trace = type; - tr->ctrl = 0; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); /* the test is responsible for resetting too */ current_trace = saved_tracer; - tr->ctrl = saved_ctrl; if (ret) { printk(KERN_CONT "FAILED!\n"); goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - for_each_tracing_cpu(i) { + for_each_tracing_cpu(i) tracing_reset(tr, i); - } + printk(KERN_CONT "PASSED\n"); } #endif @@ -508,7 +636,9 @@ int register_tracer(struct tracer *type) max_tracer_type_len = len; out: + tracing_selftest_running = false; mutex_unlock(&trace_types_lock); + lock_kernel(); return ret; } @@ -543,7 +673,9 @@ void unregister_tracer(struct tracer *type) void tracing_reset(struct trace_array *tr, int cpu) { + ftrace_disable_cpu(); ring_buffer_reset_cpu(tr->buffer, cpu); + ftrace_enable_cpu(); } #define SAVED_CMDLINES 128 @@ -563,6 +695,91 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +static int trace_stop_count; +static DEFINE_SPINLOCK(tracing_start_lock); + +/** + * ftrace_off_permanent - disable all ftrace code permanently + * + * This should only be called when a serious anomally has + * been detected. This will turn off the function tracing, + * ring buffers, and other tracing utilites. It takes no + * locks and can be called from any context. + */ +void ftrace_off_permanent(void) +{ + tracing_disabled = 1; + ftrace_stop(); + tracing_off_permanent(); +} + +/** + * tracing_start - quick start of the tracer + * + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. + */ +void tracing_start(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + if (tracing_disabled) + return; + + spin_lock_irqsave(&tracing_start_lock, flags); + if (--trace_stop_count) + goto out; + + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ + WARN_ON_ONCE(1); + trace_stop_count = 0; + goto out; + } + + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + ftrace_start(); + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + ftrace_stop(); + spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) @@ -600,7 +817,7 @@ static void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static char *trace_find_cmdline(int pid) +char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; @@ -630,17 +847,20 @@ void tracing_record_cmdline(struct task_struct *tsk) } void -tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) +tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, + int pc) { struct task_struct *tsk = current; - unsigned long pc; - - pc = preempt_count(); entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | +#else + TRACE_FLAG_IRQS_NOSUPPORT | +#endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); @@ -648,37 +868,94 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) void trace_function(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags, + int pc) { struct ring_buffer_event *event; struct ftrace_entry *entry; unsigned long irq_flags; + /* If we are reading the ring buffer, don't trace */ + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_graph_entry(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_ENT; + entry->graph_ent = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} + +static void __trace_graph_return(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ret *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ret_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_RET; + entry->ret = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} +#endif + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags, + int pc) { if (likely(!atomic_read(&data->disabled))) - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, pc); } -void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, - int skip) +static void ftrace_trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc) { +#ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -692,7 +969,7 @@ void __trace_stack(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_STACK; memset(&entry->caller, 0, sizeof(entry->caller)); @@ -704,11 +981,61 @@ void __trace_stack(struct trace_array *tr, save_stack_trace(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif } -void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip) +{ + ftrace_trace_stack(tr, data, flags, skip, preempt_count()); +} + +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, int pc) +{ +#ifdef CONFIG_STACKTRACE + struct ring_buffer_event *event; + struct userstack_entry *entry; + struct stack_trace trace; + unsigned long irq_flags; + + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_USER_STACK; + + memset(&entry->caller, 0, sizeof(entry->caller)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = 0; + trace.entries = entry->caller; + + save_stack_trace_user(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif +} + +void __trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags) +{ + ftrace_trace_userstack(tr, data, flags, preempt_count()); +} + +static void +ftrace_trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3, + int pc) { struct ring_buffer_event *event; struct trace_array_cpu *data = __data; @@ -721,23 +1048,31 @@ __trace_special(void *__tr, void *__data, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0); + tracing_generic_entry_update(&entry->ent, 0, pc); entry->ent.type = TRACE_SPECIAL; entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __trace_stack(tr, data, irq_flags, 4); + ftrace_trace_stack(tr, data, irq_flags, 4, pc); + ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); +} + +void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, - unsigned long flags) + unsigned long flags, int pc) { struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -748,7 +1083,7 @@ tracing_sched_switch_trace(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_CTX; entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; @@ -758,7 +1093,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __trace_stack(tr, data, flags, 5); + ftrace_trace_stack(tr, data, flags, 5, pc); + ftrace_trace_userstack(tr, data, flags, pc); } void @@ -766,7 +1102,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *curr, - unsigned long flags) + unsigned long flags, int pc) { struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -777,7 +1113,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_WAKE; entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; @@ -787,7 +1123,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __trace_stack(tr, data, flags, 6); + ftrace_trace_stack(tr, data, flags, 6, pc); + ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -798,25 +1135,52 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) struct trace_array *tr = &global_trace; struct trace_array_cpu *data; unsigned long flags; - long disabled; int cpu; + int pc; - if (tracing_disabled || !tr->ctrl) + if (tracing_disabled) return; + pc = preempt_count(); local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; + + if (likely(atomic_inc_return(&data->disabled) == 1)) + ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + +#ifdef CONFIG_FUNCTION_TRACER +static void +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu, resched; + int pc; + + if (unlikely(!ftrace_function_enabled)) + return; + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + local_save_flags(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - __trace_special(tr, data, arg1, arg2, arg3); + trace_function(tr, data, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - local_irq_restore(flags); + ftrace_preempt_enable(resched); } -#ifdef CONFIG_FTRACE static void function_trace_call(unsigned long ip, unsigned long parent_ip) { @@ -825,24 +1189,85 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) unsigned long flags; long disabled; int cpu; + int pc; if (unlikely(!ftrace_function_enabled)) return; - if (skip_trace(ip)) - return; + /* + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + trace_function(tr, data, ip, parent_ip, flags, pc); + } + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +int trace_graph_entry(struct ftrace_graph_ent *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + if (!ftrace_trace_task(current)) + return 0; + + if (!ftrace_graph_addr(trace->func)) + return 0; local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_entry(tr, data, trace, flags, pc); + } + /* Only do the atomic if it is not already set */ + if (!test_tsk_trace_graph(current)) + set_tsk_trace_graph(current); + atomic_dec(&data->disabled); + local_irq_restore(flags); - if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags); + return 1; +} + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, data, trace, flags, pc); + } + if (!trace->depth) + clear_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -852,9 +1277,14 @@ static struct ftrace_ops trace_ops __read_mostly = void tracing_start_function_trace(void) { ftrace_function_enabled = 0; + + if (trace_flags & TRACE_ITER_PREEMPTONLY) + trace_ops.func = function_trace_call_preempt_only; + else + trace_ops.func = function_trace_call; + register_ftrace_function(&trace_ops); - if (tracer_enabled) - ftrace_function_enabled = 1; + ftrace_function_enabled = 1; } void tracing_stop_function_trace(void) @@ -866,12 +1296,19 @@ void tracing_stop_function_trace(void) enum trace_file_type { TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, }; static void trace_iterator_increment(struct trace_iterator *iter, int cpu) { + /* Don't allow ftrace to trace into the ring buffers */ + ftrace_disable_cpu(); + iter->idx++; - ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); + if (iter->buffer_iter[iter->cpu]) + ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); + + ftrace_enable_cpu(); } static struct trace_entry * @@ -880,9 +1317,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) struct ring_buffer_event *event; struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; - event = ring_buffer_iter_peek(buf_iter, ts); + /* Don't allow ftrace to trace into the ring buffers */ + ftrace_disable_cpu(); + + if (buf_iter) + event = ring_buffer_iter_peek(buf_iter, ts); + else + event = ring_buffer_peek(iter->tr->buffer, cpu, ts); + + ftrace_enable_cpu(); + return event ? ring_buffer_event_data(event) : NULL; } + static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { @@ -938,7 +1385,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter) static void trace_consume(struct trace_iterator *iter) { + /* Don't allow ftrace to trace into the ring buffers */ + ftrace_disable_cpu(); ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); + ftrace_enable_cpu(); } static void *s_next(struct seq_file *m, void *v, loff_t *pos) @@ -982,19 +1432,19 @@ static void *s_start(struct seq_file *m, loff_t *pos) atomic_inc(&trace_record_cmdline_disabled); - /* let the tracer grab locks here if needed */ - if (current_trace->start) - current_trace->start(iter); - if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; iter->idx = -1; + ftrace_disable_cpu(); + for_each_tracing_cpu(cpu) { ring_buffer_iter_reset(iter->buffer_iter[cpu]); } + ftrace_enable_cpu(); + for (p = iter; p && l < *pos; p = s_next(m, p, &l)) ; @@ -1008,28 +1458,24 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { - struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); - - /* let the tracer release locks here if needed */ - if (current_trace && current_trace == iter->trace && iter->trace->stop) - iter->trace->stop(iter); - mutex_unlock(&trace_types_lock); } -#define KRETPROBE_MSG "[unknown/kretprobe'd]" - #ifdef CONFIG_KRETPROBES -static inline int kretprobed(unsigned long addr) +static inline const char *kretprobed(const char *name) { - return addr == (unsigned long)kretprobe_trampoline; + static const char tramp_name[] = "kretprobe_trampoline"; + int size = sizeof(tramp_name); + + if (strncmp(tramp_name, name, size) == 0) + return "[unknown/kretprobe'd]"; + return name; } #else -static inline int kretprobed(unsigned long addr) +static inline const char *kretprobed(const char *name) { - return 0; + return name; } #endif /* CONFIG_KRETPROBES */ @@ -1038,10 +1484,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; + const char *name; kallsyms_lookup(address, NULL, NULL, NULL, str); - return trace_seq_printf(s, fmt, str); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); #endif return 1; } @@ -1052,9 +1501,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; + const char *name; sprint_symbol(str, address); - return trace_seq_printf(s, fmt, str); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); #endif return 1; } @@ -1065,7 +1517,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static int +int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -1086,6 +1538,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags) +{ + struct file *file = NULL; + unsigned long vmstart = 0; + int ret = 1; + + if (mm) { + const struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ip); + if (vma) { + file = vma->vm_file; + vmstart = vma->vm_start; + } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); + } + up_read(&mm->mmap_sem); + } + if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; +} + +static int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + struct mm_struct *mm = NULL; + int ret = 1; + unsigned int i; + + if (trace_flags & TRACE_ITER_SYM_USEROBJ) { + struct task_struct *task; + /* + * we do the lookup on the thread group leader, + * since individual threads might have already quit! + */ + rcu_read_lock(); + task = find_task_by_vpid(entry->ent.tgid); + if (task) + mm = get_task_mm(task); + rcu_read_unlock(); + } + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i && ret) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + if (ret) + ret = trace_seq_puts(s, "??"); + continue; + } + if (!ret) + break; + if (ret) + ret = seq_print_user_ip(s, mm, ip, sym_flags); + } + + if (mm) + mmput(mm); + return ret; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1181,7 +1705,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); trace_seq_printf(s, "%3d", cpu); trace_seq_printf(s, "%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; @@ -1238,16 +1763,42 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) return; } - do { - cont = (struct trace_field_cont *)ent; - if (ok) - ok = (trace_seq_printf(s, "%s", cont->buf) > 0); - ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); - ent = peek_next_entry(iter, iter->cpu, NULL); - } while (ent && ent->type == TRACE_CONT); + do { + cont = (struct trace_field_cont *)ent; + if (ok) + ok = (trace_seq_printf(s, "%s", cont->buf) > 0); + + ftrace_disable_cpu(); + + if (iter->buffer_iter[iter->cpu]) + ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); + else + ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); + + ftrace_enable_cpu(); + + ent = peek_next_entry(iter, iter->cpu, NULL); + } while (ent && ent->type == TRACE_CONT); + + if (!ok) + trace_seq_putc(s, '\n'); +} + +static void test_cpu_buff_start(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + + if (!(trace_flags & TRACE_ITER_ANNOTATE)) + return; + + if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) + return; + + if (cpu_isset(iter->cpu, iter->started)) + return; - if (!ok) - trace_seq_putc(s, '\n'); + cpu_set(iter->cpu, iter->started); + trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } static enum print_line_t @@ -1269,6 +1820,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + next_entry = find_next_entry(iter, NULL, &next_ts); if (!next_entry) next_ts = iter->ts; @@ -1292,21 +1845,21 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) } switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_puts(s, " ("); - if (kretprobed(field->parent_ip)) - trace_seq_puts(s, KRETPROBE_MSG); - else - seq_print_ip_sym(s, field->parent_ip, sym_flags); + seq_print_ip_sym(s, field->parent_ip, sym_flags); trace_seq_puts(s, ")\n"); break; } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); T = field->next_state < sizeof(state_to_char) ? state_to_char[field->next_state] : 'X'; @@ -1326,7 +1879,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; } case TRACE_SPECIAL: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); trace_seq_printf(s, "# %ld %ld %ld\n", field->arg1, @@ -1335,7 +1890,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; } case TRACE_STACK: { - struct stack_entry *field = (struct stack_entry *)entry; + struct stack_entry *field; + + trace_assign_type(field, entry); for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { if (i) @@ -1346,7 +1903,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; } case TRACE_PRINT: { - struct print_entry *field = (struct print_entry *)entry; + struct print_entry *field; + + trace_assign_type(field, entry); seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_printf(s, ": %s", field->buf); @@ -1354,6 +1913,27 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } + case TRACE_BRANCH: { + struct trace_branch *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? " ok " : " MISS ", + field->func, + field->file, + field->line); + break; + } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + seq_print_userip_objs(field, s, sym_flags); + trace_seq_putc(s, '\n'); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1378,6 +1958,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + comm = trace_find_cmdline(iter->ent->pid); t = ns2usecs(iter->ts); @@ -1396,7 +1978,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); ret = seq_print_ip_sym(s, field->ip, sym_flags); if (!ret) @@ -1406,12 +1990,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) ret = trace_seq_printf(s, " <-"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - if (kretprobed(field->parent_ip)) - ret = trace_seq_puts(s, KRETPROBE_MSG); - else - ret = seq_print_ip_sym(s, - field->parent_ip, - sym_flags); + ret = seq_print_ip_sym(s, + field->parent_ip, + sym_flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -1422,8 +2003,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); S = field->prev_state < sizeof(state_to_char) ? state_to_char[field->prev_state] : 'X'; @@ -1443,7 +2025,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); ret = trace_seq_printf(s, "# %ld %ld %ld\n", field->arg1, @@ -1454,7 +2038,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_STACK: { - struct stack_entry *field = (struct stack_entry *)entry; + struct stack_entry *field; + + trace_assign_type(field, entry); for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { if (i) { @@ -1473,7 +2059,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_PRINT: { - struct print_entry *field = (struct print_entry *)entry; + struct print_entry *field; + + trace_assign_type(field, entry); seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_printf(s, ": %s", field->buf); @@ -1481,6 +2069,37 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } + case TRACE_GRAPH_RET: { + return print_graph_function(iter); + } + case TRACE_GRAPH_ENT: { + return print_graph_function(iter); + } + case TRACE_BRANCH: { + struct trace_branch *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? " ok " : " MISS ", + field->func, + field->file, + field->line); + break; + } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + ret = seq_print_userip_objs(field, s, sym_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_putc(s, '\n'); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + break; + } } return TRACE_TYPE_HANDLED; } @@ -1504,7 +2123,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); ret = trace_seq_printf(s, "%x %x\n", field->ip, @@ -1515,8 +2136,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); S = field->prev_state < sizeof(state_to_char) ? state_to_char[field->prev_state] : 'X'; @@ -1537,8 +2159,11 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); ret = trace_seq_printf(s, "# %ld %ld %ld\n", field->arg1, @@ -1549,7 +2174,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_PRINT: { - struct print_entry *field = (struct print_entry *)entry; + struct print_entry *field; + + trace_assign_type(field, entry); trace_seq_printf(s, "# %lx %s", field->ip, field->buf); if (entry->flags & TRACE_FLAG_CONT) @@ -1568,6 +2195,7 @@ do { \ #define SEQ_PUT_HEX_FIELD_RET(s, x) \ do { \ + BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \ if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ return 0; \ } while (0) @@ -1590,7 +2218,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_HEX_FIELD_RET(s, field->ip); SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); @@ -1598,8 +2228,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); S = field->prev_state < sizeof(state_to_char) ? state_to_char[field->prev_state] : 'X'; @@ -1617,8 +2248,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_HEX_FIELD_RET(s, field->arg1); SEQ_PUT_HEX_FIELD_RET(s, field->arg2); @@ -1642,20 +2276,23 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, iter->cpu); + SEQ_PUT_FIELD_RET(s, entry->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_FIELD_RET(s, field->ip); SEQ_PUT_FIELD_RET(s, field->parent_ip); break; } case TRACE_CTX: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_FIELD_RET(s, field->prev_pid); SEQ_PUT_FIELD_RET(s, field->prev_prio); @@ -1666,8 +2303,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_FIELD_RET(s, field->arg1); SEQ_PUT_FIELD_RET(s, field->arg2); @@ -1683,10 +2323,16 @@ static int trace_empty(struct trace_iterator *iter) int cpu; for_each_tracing_cpu(cpu) { - if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) - return 0; + if (iter->buffer_iter[cpu]) { + if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) + return 0; + } else { + if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) + return 0; + } } - return TRACE_TYPE_HANDLED; + + return 1; } static enum print_line_t print_trace_line(struct trace_iterator *iter) @@ -1723,7 +2369,9 @@ static int s_show(struct seq_file *m, void *v) seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); } - if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (iter->trace && iter->trace->print_header) + iter->trace->print_header(m); + else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return 0; @@ -1775,9 +2423,20 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Notify the tracer early; before we stop tracing. */ + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + + /* Annotate start of buffers if we had overruns */ + if (ring_buffer_overruns(iter->tr->buffer)) + iter->iter_flags |= TRACE_FILE_ANNOTATE; + + for_each_tracing_cpu(cpu) { + iter->buffer_iter[cpu] = ring_buffer_read_start(iter->tr->buffer, cpu); + if (!iter->buffer_iter[cpu]) goto fail_buffer; } @@ -1791,13 +2450,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) m->private = iter; /* stop the trace while dumping */ - if (iter->tr->ctrl) { - tracer_enabled = 0; - ftrace_function_enabled = 0; - } - - if (iter->trace && iter->trace->open) - iter->trace->open(iter); + tracing_stop(); mutex_unlock(&trace_types_lock); @@ -1810,6 +2463,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) ring_buffer_read_finish(iter->buffer_iter[cpu]); } mutex_unlock(&trace_types_lock); + kfree(iter); return ERR_PTR(-ENOMEM); } @@ -1839,14 +2493,7 @@ int tracing_release(struct inode *inode, struct file *file) iter->trace->close(iter); /* reenable tracing if it was previously enabled */ - if (iter->tr->ctrl) { - tracer_enabled = 1; - /* - * It is safe to enable function tracing even if it - * isn't used - */ - ftrace_function_enabled = 1; - } + tracing_start(); mutex_unlock(&trace_types_lock); seq_release(inode, file); @@ -1999,7 +2646,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -2020,11 +2667,11 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); if (err) goto err_unlock; - raw_local_irq_disable(); + local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* @@ -2041,7 +2688,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, } } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_enable(); + local_irq_enable(); tracing_cpumask = tracing_cpumask_new; @@ -2062,13 +2709,16 @@ static struct file_operations tracing_cpumask_fops = { }; static ssize_t -tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, +tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + int i; char *buf; int r = 0; int len = 0; - int i; + u32 tracer_flags = current_trace->flags->val; + struct tracer_opt *trace_opts = current_trace->flags->opts; + /* calulate max size */ for (i = 0; trace_options[i]; i++) { @@ -2076,6 +2726,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, len += 3; /* "no" and space */ } + /* + * Increase the size with names of options specific + * of the current tracer. + */ + for (i = 0; trace_opts[i].name; i++) { + len += strlen(trace_opts[i].name); + len += 3; /* "no" and space */ + } + /* +2 for \n and \0 */ buf = kmalloc(len + 2, GFP_KERNEL); if (!buf) @@ -2088,6 +2747,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "no%s ", trace_options[i]); } + for (i = 0; trace_opts[i].name; i++) { + if (tracer_flags & trace_opts[i].bit) + r += sprintf(buf + r, "%s ", + trace_opts[i].name); + else + r += sprintf(buf + r, "no%s ", + trace_opts[i].name); + } + r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); @@ -2098,13 +2766,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, return r; } +/* Try to assign a tracer specific option */ +static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +{ + struct tracer_flags *trace_flags = trace->flags; + struct tracer_opt *opts = NULL; + int ret = 0, i = 0; + int len; + + for (i = 0; trace_flags->opts[i].name; i++) { + opts = &trace_flags->opts[i]; + len = strlen(opts->name); + + if (strncmp(cmp, opts->name, len) == 0) { + ret = trace->set_flag(trace_flags->val, + opts->bit, !neg); + break; + } + } + /* Not found */ + if (!trace_flags->opts[i].name) + return -EINVAL; + + /* Refused to handle */ + if (ret) + return ret; + + if (neg) + trace_flags->val &= ~opts->bit; + else + trace_flags->val |= opts->bit; + + return 0; +} + static ssize_t -tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, +tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; char *cmp = buf; int neg = 0; + int ret; int i; if (cnt >= sizeof(buf)) @@ -2131,11 +2834,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, break; } } - /* - * If no option could be set, return an error: - */ - if (!trace_options[i]) - return -EINVAL; + + /* If no option could be set, test the specific tracer options */ + if (!trace_options[i]) { + ret = set_tracer_option(current_trace, cmp, neg); + if (ret) + return ret; + } filp->f_pos += cnt; @@ -2144,8 +2849,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, static struct file_operations tracing_iter_fops = { .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .read = tracing_trace_options_read, + .write = tracing_trace_options_write, }; static const char readme_msg[] = @@ -2159,9 +2864,9 @@ static const char readme_msg[] = "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/iter_ctrl\n" + "# cat /debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" "echo 0 > /debug/tracing/tracing_enabled\n" @@ -2184,11 +2889,10 @@ static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_array *tr = filp->private_data; char buf[64]; int r; - r = sprintf(buf, "%ld\n", tr->ctrl); + r = sprintf(buf, "%u\n", tracer_enabled); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2216,16 +2920,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, val = !!val; mutex_lock(&trace_types_lock); - if (tr->ctrl ^ val) { - if (val) + if (tracer_enabled ^ val) { + if (val) { tracer_enabled = 1; - else + if (current_trace->start) + current_trace->start(tr); + tracing_start(); + } else { tracer_enabled = 0; - - tr->ctrl = val; - - if (current_trace && current_trace->ctrl_update) - current_trace->ctrl_update(tr); + tracing_stop(); + if (current_trace->stop) + current_trace->stop(tr); + } } mutex_unlock(&trace_types_lock); @@ -2251,14 +2957,52 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +static int tracing_set_tracer(char *buf) +{ + struct trace_array *tr = &global_trace; + struct tracer *t; + int ret = 0; + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(t->name, buf) == 0) + break; + } + if (!t) { + ret = -EINVAL; + goto out; + } + if (t == current_trace) + goto out; + + trace_branch_disable(); + if (current_trace && current_trace->reset) + current_trace->reset(tr); + + current_trace = t; + if (t->init) { + ret = t->init(tr); + if (ret) + goto out; + } + + trace_branch_enable(tr); + out: + mutex_unlock(&trace_types_lock); + + return ret; +} + static ssize_t tracing_set_trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_array *tr = &global_trace; - struct tracer *t; char buf[max_tracer_type_len+1]; int i; + size_t ret; + int err; + + ret = cnt; if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; @@ -2272,27 +3016,13 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - mutex_lock(&trace_types_lock); - for (t = trace_types; t; t = t->next) { - if (strcmp(t->name, buf) == 0) - break; - } - if (!t || t == current_trace) - goto out; - - if (current_trace && current_trace->reset) - current_trace->reset(tr); - - current_trace = t; - if (t->init) - t->init(tr); - - out: - mutex_unlock(&trace_types_lock); + err = tracing_set_tracer(buf); + if (err) + return err; - filp->f_pos += cnt; + filp->f_pos += ret; - return cnt; + return ret; } static ssize_t @@ -2341,7 +3071,6 @@ static atomic_t tracing_reader; static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_iterator *iter; - int cpu; if (tracing_disabled) return -ENODEV; @@ -2358,42 +3087,25 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; mutex_lock(&trace_types_lock); + + /* trace pipe does not show start of buffer */ + cpus_setall(iter->started); + iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; - for_each_tracing_cpu(cpu) { - iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); - if (!iter->buffer_iter[cpu]) - goto fail_buffer; - } - if (iter->trace->pipe_open) iter->trace->pipe_open(iter); mutex_unlock(&trace_types_lock); return 0; - - fail_buffer: - for_each_tracing_cpu(cpu) { - if (iter->buffer_iter[cpu]) - ring_buffer_read_finish(iter->buffer_iter[cpu]); - } - mutex_unlock(&trace_types_lock); - - return -ENOMEM; } static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; - int cpu; - for_each_tracing_cpu(cpu) { - if (iter->buffer_iter[cpu]) - ring_buffer_read_finish(iter->buffer_iter[cpu]); - } kfree(iter); atomic_dec(&tracing_reader); @@ -2429,17 +3141,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; - unsigned long flags; -#ifdef CONFIG_FTRACE - int ftrace_save; -#endif ssize_t sret; /* return any leftover data */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (sret != -EBUSY) return sret; - sret = 0; trace_seq_reset(&iter->seq); @@ -2450,6 +3157,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, goto out; } +waitagain: + sret = 0; while (trace_empty(iter)) { if ((filp->f_flags & O_NONBLOCK)) { @@ -2514,21 +3223,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, offsetof(struct trace_iterator, seq)); iter->pos = -1; - /* - * We need to stop all tracing on all CPUS to read the - * the next buffer. This is a bit expensive, but is - * not done often. We fill all what we can read, - * and then release the locks again. - */ - - local_irq_disable(); -#ifdef CONFIG_FTRACE - ftrace_save = ftrace_enabled; - ftrace_enabled = 0; -#endif - smp_wmb(); - ring_buffer_lock(iter->tr->buffer, &flags); - while (find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int len = iter->seq.len; @@ -2546,18 +3240,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, break; } - ring_buffer_unlock(iter->tr->buffer, flags); -#ifdef CONFIG_FTRACE - ftrace_enabled = ftrace_save; -#endif - local_irq_enable(); - /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (iter->seq.readpos >= iter->seq.len) trace_seq_reset(&iter->seq); + + /* + * If there was nothing to send to user, inspite of consuming trace + * entries, go back to wait for more entries. + */ if (sret == -EBUSY) - sret = 0; + goto waitagain; out: mutex_unlock(&trace_types_lock); @@ -2573,7 +3266,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = sprintf(buf, "%lu\n", tr->entries); + r = sprintf(buf, "%lu\n", tr->entries >> 10); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2583,8 +3276,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int ret; - struct trace_array *tr = filp->private_data; + int ret, cpu; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2604,13 +3296,19 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); - if (tr->ctrl) { - cnt = -EBUSY; - pr_info("ftrace: please disable tracing" - " before modifying buffer size\n"); - goto out; + tracing_stop(); + + /* disable all cpu buffers */ + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_inc(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_inc(&max_tr.data[cpu]->disabled); } + /* value is in KB */ + val <<= 10; + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { @@ -2642,6 +3340,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) cnt = -ENOMEM; out: + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_dec(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_dec(&max_tr.data[cpu]->disabled); + } + + tracing_start(); max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); @@ -2653,7 +3359,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); + ret = trace_vprintk(0, -1, fmt, args); va_end(args); return ret; } @@ -2664,9 +3370,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, { char *buf; char *end; - struct trace_array *tr = &global_trace; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -2732,22 +3437,38 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE +int __weak ftrace_arch_read_dyn_info(char *buf, int size) +{ + return 0; +} + static ssize_t -tracing_read_long(struct file *filp, char __user *ubuf, +tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + static char ftrace_dyn_info_buffer[1024]; + static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; - char buf[64]; + char *buf = ftrace_dyn_info_buffer; + int size = ARRAY_SIZE(ftrace_dyn_info_buffer); int r; - r = sprintf(buf, "%ld\n", *p); + mutex_lock(&dyn_info_mutex); + r = sprintf(buf, "%ld ", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); + buf[r++] = '\n'; + + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + + mutex_unlock(&dyn_info_mutex); + + return r; } -static struct file_operations tracing_read_long_fops = { +static struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, - .read = tracing_read_long, + .read = tracing_read_dyn_info, }; #endif @@ -2788,10 +3509,10 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); - entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + entry = debugfs_create_file("trace_options", 0644, d_tracer, NULL, &tracing_iter_fops); if (!entry) - pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + pr_warning("Could not create debugfs 'trace_options' entry\n"); entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); @@ -2841,11 +3562,11 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); - entry = debugfs_create_file("trace_entries", 0644, d_tracer, + entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); if (!entry) pr_warning("Could not create debugfs " - "'trace_entries' entry\n"); + "'buffer_size_kb' entry\n"); entry = debugfs_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); @@ -2856,7 +3577,7 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, - &tracing_read_long_fops); + &tracing_dyn_info_fops); if (!entry) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); @@ -2867,7 +3588,7 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; @@ -2875,23 +3596,23 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; + int cpu, len = 0, size, pc; struct print_entry *entry; - unsigned long flags, irq_flags; - long disabled; - int cpu, len = 0, size; + unsigned long irq_flags; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled || tracing_selftest_running) return 0; - local_irq_save(flags); + pc = preempt_count(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - if (unlikely(disabled != 1)) + if (unlikely(atomic_read(&data->disabled))) goto out; - spin_lock(&trace_buf_lock); + pause_graph_tracing(); + spin_lock_irqsave(&trace_buf_lock, irq_flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -2902,20 +3623,20 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, irq_flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; + entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - spin_unlock(&trace_buf_lock); - + spin_unlock_irqrestore(&trace_buf_lock, irq_flags); + unpause_graph_tracing(); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + preempt_enable_notrace(); return len; } @@ -2930,7 +3651,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, fmt, ap); + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } @@ -2939,7 +3660,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); return NOTIFY_OK; } @@ -2955,7 +3677,8 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); break; default: break; @@ -2996,7 +3719,6 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } - void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); @@ -3004,8 +3726,8 @@ void ftrace_dump(void) static struct trace_iterator iter; static cpumask_t mask; static int dump_ran; - unsigned long flags, irq_flags; - int cnt = 0; + unsigned long flags; + int cnt = 0, cpu; /* only one dump */ spin_lock_irqsave(&ftrace_dump_lock, flags); @@ -3015,7 +3737,14 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ - ftrace_kill_atomic(); + ftrace_kill(); + + for_each_tracing_cpu(cpu) { + atomic_inc(&global_trace.data[cpu]->disabled); + } + + /* don't look at user memory in panic mode */ + trace_flags &= ~TRACE_ITER_SYM_USEROBJ; printk(KERN_TRACE "Dumping ftrace buffer:\n"); @@ -3031,8 +3760,6 @@ void ftrace_dump(void) cpus_clear(mask); - ring_buffer_lock(iter.tr->buffer, &irq_flags); - while (!trace_empty(&iter)) { if (!cnt) @@ -3060,8 +3787,6 @@ void ftrace_dump(void) else printk(KERN_TRACE "---------------------------------\n"); - ring_buffer_unlock(iter.tr->buffer, irq_flags); - out: spin_unlock_irqrestore(&ftrace_dump_lock, flags); } @@ -3114,7 +3839,6 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ - global_trace.ctrl = tracer_enabled; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list,