Merge branch 'linus' into tracing/core
[safe/jmp/linux-2.6] / kernel / trace / trace.h
index e603a29..ba25793 100644 (file)
@@ -9,7 +9,11 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
+#include <trace/power.h>
+
+#include <linux/trace_seq.h>
+#include <linux/ftrace_event.h>
 
 enum trace_type {
        __TRACE_FIRST_TYPE = 0,
@@ -19,6 +23,7 @@ enum trace_type {
        TRACE_WAKE,
        TRACE_STACK,
        TRACE_PRINT,
+       TRACE_BPRINT,
        TRACE_SPECIAL,
        TRACE_MMIO_RW,
        TRACE_MMIO_MAP,
@@ -29,6 +34,8 @@ enum trace_type {
        TRACE_GRAPH_ENT,
        TRACE_USER_STACK,
        TRACE_HW_BRANCHES,
+       TRACE_SYSCALL_ENTER,
+       TRACE_SYSCALL_EXIT,
        TRACE_KMEM_ALLOC,
        TRACE_KMEM_FREE,
        TRACE_POWER,
@@ -38,21 +45,6 @@ enum trace_type {
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-       unsigned char           type;
-       unsigned char           cpu;
-       unsigned char           flags;
-       unsigned char           preempt_count;
-       int                     pid;
-       int                     tgid;
-};
-
-/*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
@@ -63,13 +55,13 @@ struct ftrace_entry {
 
 /* Function call entry */
 struct ftrace_graph_ent_entry {
-       struct trace_entry                      ent;
+       struct trace_entry              ent;
        struct ftrace_graph_ent         graph_ent;
 };
 
 /* Function return entry */
 struct ftrace_graph_ret_entry {
-       struct trace_entry                      ent;
+       struct trace_entry              ent;
        struct ftrace_graph_ret         ret;
 };
 extern struct tracer boot_tracer;
@@ -115,12 +107,18 @@ struct userstack_entry {
 };
 
 /*
- * ftrace_printk entry:
+ * trace_printk entry:
  */
+struct bprint_entry {
+       struct trace_entry      ent;
+       unsigned long           ip;
+       const char              *fmt;
+       u32                     buf[];
+};
+
 struct print_entry {
        struct trace_entry      ent;
        unsigned long           ip;
-       int                     depth;
        char                    buf[];
 };
 
@@ -173,6 +171,12 @@ struct trace_power {
        struct power_trace      state_data;
 };
 
+enum kmemtrace_type_id {
+       KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
+       KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
+       KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
+};
+
 struct kmemtrace_alloc_entry {
        struct trace_entry      ent;
        enum kmemtrace_type_id type_id;
@@ -191,11 +195,24 @@ struct kmemtrace_free_entry {
        const void *ptr;
 };
 
+struct syscall_trace_enter {
+       struct trace_entry      ent;
+       int                     nr;
+       unsigned long           args[];
+};
+
+struct syscall_trace_exit {
+       struct trace_entry      ent;
+       int                     nr;
+       unsigned long           ret;
+};
+
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
  *  IRQS_OFF           - interrupts were disabled
- *  IRQS_NOSUPPORT     - arch does not support irqs_disabled_flags
+ *  IRQS_NOSUPPORT     - arch does not support irqs_disabled_flags
  *  NEED_RESCED                - reschedule is requested
  *  HARDIRQ            - inside an interrupt handler
  *  SOFTIRQ            - inside a softirq handler
@@ -217,6 +234,7 @@ enum trace_flag_type {
  */
 struct trace_array_cpu {
        atomic_t                disabled;
+       void                    *buffer_page;   /* ring buffer spare */
 
        /* these fields get copied into max-trace: */
        unsigned long           trace_idx;
@@ -234,8 +252,6 @@ struct trace_array_cpu {
        char                    comm[TASK_COMM_LEN];
 };
 
-struct trace_iterator;
-
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -284,6 +300,7 @@ extern void __ftrace_bad_type(void);
                IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
                IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
                IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
+               IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
                IF_ASSIGN(var, ent, struct special_entry, 0);           \
                IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
                          TRACE_MMIO_RW);                               \
@@ -297,30 +314,26 @@ extern void __ftrace_bad_type(void);
                IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,      \
                          TRACE_GRAPH_RET);             \
                IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
-               IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+               IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
                IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,       \
                          TRACE_KMEM_ALLOC);    \
                IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
                          TRACE_KMEM_FREE);     \
+               IF_ASSIGN(var, ent, struct syscall_trace_enter,         \
+                         TRACE_SYSCALL_ENTER);                         \
+               IF_ASSIGN(var, ent, struct syscall_trace_exit,          \
+                         TRACE_SYSCALL_EXIT);                          \
                __ftrace_bad_type();                                    \
        } while (0)
 
-/* Return values for print_line callback */
-enum print_line_t {
-       TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
-       TRACE_TYPE_HANDLED      = 1,
-       TRACE_TYPE_UNHANDLED    = 2     /* Relay to other output functions */
-};
-
-
 /*
  * An option specific to a tracer. This is a boolean value.
  * The bit is the bit index that sets its value on the
  * flags value in struct tracer_flags.
  */
 struct tracer_opt {
-       const char      *name; /* Will appear on the trace_options file */
-       u32             bit; /* Mask assigned in val field in tracer_flags */
+       const char      *name; /* Will appear on the trace_options file */
+       u32             bit; /* Mask assigned in val field in tracer_flags */
 };
 
 /*
@@ -329,29 +342,51 @@ struct tracer_opt {
  */
 struct tracer_flags {
        u32                     val;
-       struct tracer_opt       *opts;
+       struct tracer_opt       *opts;
 };
 
 /* Makes more easy to define a tracer opt */
 #define TRACER_OPT(s, b)       .name = #s, .bit = b
 
 
-/*
- * A specific tracer, represented by methods that operate on a trace array:
+/**
+ * struct tracer - a specific tracer and its callbacks to interact with debugfs
+ * @name: the name chosen to select it on the available_tracers file
+ * @init: called when one switches to this tracer (echo name > current_tracer)
+ * @reset: called when one switches to another tracer
+ * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
+ * @stop: called when tracing is paused (echo 0 > tracing_enabled)
+ * @open: called when the trace file is opened
+ * @pipe_open: called when the trace_pipe file is opened
+ * @wait_pipe: override how the user waits for traces on trace_pipe
+ * @close: called when the trace file is released
+ * @read: override the default read callback on trace_pipe
+ * @splice_read: override the default splice_read callback on trace_pipe
+ * @selftest: selftest to run on boot (see trace_selftest.c)
+ * @print_headers: override the first lines that describe your columns
+ * @print_line: callback that prints a trace
+ * @set_flag: signals one of your private flags changed (trace_options file)
+ * @flags: your private flags
  */
 struct tracer {
        const char              *name;
-       /* Your tracer should raise a warning if init fails */
        int                     (*init)(struct trace_array *tr);
        void                    (*reset)(struct trace_array *tr);
        void                    (*start)(struct trace_array *tr);
        void                    (*stop)(struct trace_array *tr);
        void                    (*open)(struct trace_iterator *iter);
        void                    (*pipe_open)(struct trace_iterator *iter);
+       void                    (*wait_pipe)(struct trace_iterator *iter);
        void                    (*close)(struct trace_iterator *iter);
        ssize_t                 (*read)(struct trace_iterator *iter,
                                        struct file *filp, char __user *ubuf,
                                        size_t cnt, loff_t *ppos);
+       ssize_t                 (*splice_read)(struct trace_iterator *iter,
+                                              struct file *filp,
+                                              loff_t *ppos,
+                                              struct pipe_inode_info *pipe,
+                                              size_t len,
+                                              unsigned int flags);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
        int                     (*selftest)(struct tracer *trace,
                                            struct trace_array *tr);
@@ -362,67 +397,65 @@ struct tracer {
        int                     (*set_flag)(u32 old_flags, u32 bit, int set);
        struct tracer           *next;
        int                     print_max;
-       struct tracer_flags     *flags;
+       struct tracer_flags     *flags;
        struct tracer_stat      *stats;
 };
 
-struct trace_seq {
-       unsigned char           buffer[PAGE_SIZE];
-       unsigned int            len;
-       unsigned int            readpos;
-};
 
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-       struct trace_array      *tr;
-       struct tracer           *trace;
-       void                    *private;
-       struct ring_buffer_iter *buffer_iter[NR_CPUS];
-
-       /* The below is zeroed out in pipe_read */
-       struct trace_seq        seq;
-       struct trace_entry      *ent;
-       int                     cpu;
-       u64                     ts;
-
-       unsigned long           iter_flags;
-       loff_t                  pos;
-       long                    idx;
-
-       cpumask_var_t           started;
-};
+#define TRACE_PIPE_ALL_CPU     -1
 
+int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
+void tracing_reset_current(int cpu);
+void tracing_reset_current_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *trace_create_file(const char *name,
+                                mode_t mode,
+                                struct dentry *parent,
+                                void *data,
+                                const struct file_operations *fops);
+
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct ring_buffer_event;
+
+struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
+                                                   int type,
+                                                   unsigned long len,
+                                                   unsigned long flags,
+                                                   int pc);
+void trace_buffer_unlock_commit(struct trace_array *tr,
+                               struct ring_buffer_event *event,
+                               unsigned long flags, int pc);
+
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
                                                struct trace_array_cpu *data);
+
+struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
+                                         int *ent_cpu, u64 *ent_ts);
+
 void tracing_generic_entry_update(struct trace_entry *entry,
                                  unsigned long flags,
                                  int pc);
 
+void default_wait_pipe(struct trace_iterator *iter);
+void poll_wait_pipe(struct trace_iterator *iter);
+
 void ftrace(struct trace_array *tr,
                            struct trace_array_cpu *data,
                            unsigned long ip,
                            unsigned long parent_ip,
                            unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
-                               struct trace_array_cpu *data,
                                struct task_struct *prev,
                                struct task_struct *next,
                                unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
-                               struct trace_array_cpu *data,
                                struct task_struct *wakee,
                                struct task_struct *cur,
                                unsigned long flags, int pc);
@@ -432,7 +465,6 @@ void trace_special(struct trace_array *tr,
                   unsigned long arg2,
                   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
-                   struct trace_array_cpu *data,
                    unsigned long ip,
                    unsigned long parent_ip,
                    unsigned long flags, int pc);
@@ -458,7 +490,6 @@ void update_max_tr_single(struct trace_array *tr,
                          struct task_struct *tsk, int cpu);
 
 void __trace_stack(struct trace_array *tr,
-                  struct trace_array_cpu *data,
                   unsigned long flags,
                   int skip, int pc);
 
@@ -478,7 +509,7 @@ struct tracer_switch_ops {
 };
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 
-extern char *trace_find_cmdline(int pid);
+extern void trace_find_cmdline(int pid, char comm[]);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
@@ -489,6 +520,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 extern int trace_selftest_startup_function(struct tracer *trace,
                                           struct trace_array *tr);
+extern int trace_selftest_startup_function_graph(struct tracer *trace,
+                                                struct trace_array *tr);
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
                                          struct trace_array *tr);
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -508,15 +541,19 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
 extern int
-trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+extern int
+trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+extern enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
@@ -548,7 +585,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
        return 1;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
-
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function(struct trace_iterator *iter)
@@ -591,7 +627,12 @@ enum trace_iterator_flags {
        TRACE_ITER_ANNOTATE             = 0x2000,
        TRACE_ITER_USERSTACKTRACE       = 0x4000,
        TRACE_ITER_SYM_USEROBJ          = 0x8000,
-       TRACE_ITER_PRINTK_MSGONLY       = 0x10000
+       TRACE_ITER_PRINTK_MSGONLY       = 0x10000,
+       TRACE_ITER_CONTEXT_INFO         = 0x20000, /* Print pid/cpu/time */
+       TRACE_ITER_LATENCY_FMT          = 0x40000,
+       TRACE_ITER_GLOBAL_CLK           = 0x80000,
+       TRACE_ITER_SLEEP_TIME           = 0x100000,
+       TRACE_ITER_GRAPH_TIME           = 0x200000,
 };
 
 /*
@@ -612,12 +653,12 @@ extern struct tracer nop_trace;
  * preempt_enable (after a disable), a schedule might take place
  * causing an infinite recursion.
  *
- * To prevent this, we read the need_recshed flag before
+ * To prevent this, we read the need_resched flag before
  * disabling preemption. When we want to enable preemption we
  * check the flag, if it is set, then we call preempt_enable_no_resched.
  * Otherwise, we call preempt_enable.
  *
- * The rational for doing the above is that if need resched is set
+ * The rational for doing the above is that if need_resched is set
  * and we have yet to reschedule, we are either in an atomic location
  * (where we do not need to check for scheduling) or we are inside
  * the scheduler and do not want to resched.
@@ -638,7 +679,7 @@ static inline int ftrace_preempt_disable(void)
  *
  * This is a scheduler safe way to enable preemption and not miss
  * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we were either inside an atomic or
+ * If resched is set, then we are either inside an atomic or
  * are inside the scheduler (we would have already scheduled
  * otherwise). In this case, we do not want to call normal
  * preempt_enable, but preempt_enable_no_resched instead.
@@ -675,4 +716,128 @@ static inline void trace_branch_disable(void)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
+/* set ring buffers to default size if not already done so */
+int tracing_update_buffers(void);
+
+/* trace event type bit fields, not numeric */
+enum {
+       TRACE_EVENT_TYPE_PRINTF         = 1,
+       TRACE_EVENT_TYPE_RAW            = 2,
+};
+
+struct ftrace_event_field {
+       struct list_head        link;
+       char                    *name;
+       char                    *type;
+       int                     offset;
+       int                     size;
+       int                     is_signed;
+};
+
+struct event_filter {
+       int                     n_preds;
+       struct filter_pred      **preds;
+       char                    *filter_string;
+};
+
+struct event_subsystem {
+       struct list_head        list;
+       const char              *name;
+       struct dentry           *entry;
+       void                    *filter;
+};
+
+struct filter_pred;
+
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
+                                int val1, int val2);
+
+struct filter_pred {
+       filter_pred_fn_t fn;
+       u64 val;
+       char str_val[MAX_FILTER_STR_VAL];
+       int str_len;
+       char *field_name;
+       int offset;
+       int not;
+       int op;
+       int pop_n;
+};
+
+extern void print_event_filter(struct ftrace_event_call *call,
+                              struct trace_seq *s);
+extern int apply_event_filter(struct ftrace_event_call *call,
+                             char *filter_string);
+extern int apply_subsystem_event_filter(struct event_subsystem *system,
+                                       char *filter_string);
+extern void print_subsystem_event_filter(struct event_subsystem *system,
+                                        struct trace_seq *s);
+
+static inline int
+filter_check_discard(struct ftrace_event_call *call, void *rec,
+                    struct ring_buffer *buffer,
+                    struct ring_buffer_event *event)
+{
+       if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+               ring_buffer_discard_commit(buffer, event);
+               return 1;
+       }
+
+       return 0;
+}
+
+#define DEFINE_COMPARISON_PRED(type)                                   \
+static int filter_pred_##type(struct filter_pred *pred, void *event,   \
+                             int val1, int val2)                       \
+{                                                                      \
+       type *addr = (type *)(event + pred->offset);                    \
+       type val = (type)pred->val;                                     \
+       int match = 0;                                                  \
+                                                                       \
+       switch (pred->op) {                                             \
+       case OP_LT:                                                     \
+               match = (*addr < val);                                  \
+               break;                                                  \
+       case OP_LE:                                                     \
+               match = (*addr <= val);                                 \
+               break;                                                  \
+       case OP_GT:                                                     \
+               match = (*addr > val);                                  \
+               break;                                                  \
+       case OP_GE:                                                     \
+               match = (*addr >= val);                                 \
+               break;                                                  \
+       default:                                                        \
+               break;                                                  \
+       }                                                               \
+                                                                       \
+       return match;                                                   \
+}
+
+#define DEFINE_EQUALITY_PRED(size)                                     \
+static int filter_pred_##size(struct filter_pred *pred, void *event,   \
+                             int val1, int val2)                       \
+{                                                                      \
+       u##size *addr = (u##size *)(event + pred->offset);              \
+       u##size val = (u##size)pred->val;                               \
+       int match;                                                      \
+                                                                       \
+       match = (val == *addr) ^ pred->not;                             \
+                                                                       \
+       return match;                                                   \
+}
+
+extern struct mutex event_mutex;
+extern struct list_head ftrace_events;
+
+extern const char *__start___trace_bprintk_fmt[];
+extern const char *__stop___trace_bprintk_fmt[];
+
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+       extern struct ftrace_event_call event_##call;
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
+#include "trace_event_types.h"
+
 #endif /* _LINUX_KERNEL_TRACE_H */