Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6
[safe/jmp/linux-2.6] / kernel / trace / trace_output.c
index 83a8abb..ab13d70 100644 (file)
 /* must be a power of 2 */
 #define EVENT_HASHSIZE 128
 
-static DEFINE_MUTEX(trace_event_mutex);
+DECLARE_RWSEM(trace_event_mutex);
+
+DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
+
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
-void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
        int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+       int ret;
 
-       s->buffer[len] = 0;
-       seq_puts(m, s->buffer);
+       ret = seq_write(m, s->buffer, len);
 
-       trace_seq_init(s);
+       /*
+        * Only reset this buffer if we successfully wrote to the
+        * seq_file buffer.
+        */
+       if (!ret)
+               trace_seq_init(s);
+
+       return ret;
 }
 
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -66,6 +77,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
  * @s: trace sequence descriptor
  * @fmt: printf format string
  *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
  * The tracer may use either sequence operations or its own
  * copy to user routines. To simplify formating of a trace
  * trace_seq_printf is used to store strings into a special
@@ -79,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
        va_list ap;
        int ret;
 
-       if (!len)
+       if (s->full || !len)
                return 0;
 
        va_start(ap, fmt);
@@ -87,28 +101,66 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
        va_end(ap);
 
        /* If we can't write it all, don't bother writing anything */
-       if (ret >= len)
+       if (ret >= len) {
+               s->full = 1;
                return 0;
+       }
 
        s->len += ret;
 
-       return len;
+       return 1;
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+       int len = (PAGE_SIZE - 1) - s->len;
+       int ret;
+
+       if (s->full || !len)
+               return 0;
+
+       ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+       /* If we can't write it all, don't bother writing anything */
+       if (ret >= len) {
+               s->full = 1;
+               return 0;
+       }
+
+       s->len += ret;
+
+       return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
+
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
        int len = (PAGE_SIZE - 1) - s->len;
        int ret;
 
-       if (!len)
+       if (s->full || !len)
                return 0;
 
        ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
 
        /* If we can't write it all, don't bother writing anything */
-       if (ret >= len)
+       if (ret >= len) {
+               s->full = 1;
                return 0;
+       }
 
        s->len += ret;
 
@@ -129,9 +181,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 {
        int len = strlen(str);
 
-       if (len > ((PAGE_SIZE - 1) - s->len))
+       if (s->full)
                return 0;
 
+       if (len > ((PAGE_SIZE - 1) - s->len)) {
+               s->full = 1;
+               return 0;
+       }
+
        memcpy(s->buffer + s->len, str, len);
        s->len += len;
 
@@ -140,18 +197,29 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 
 int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
-       if (s->len >= (PAGE_SIZE - 1))
+       if (s->full)
                return 0;
 
+       if (s->len >= (PAGE_SIZE - 1)) {
+               s->full = 1;
+               return 0;
+       }
+
        s->buffer[s->len++] = c;
 
        return 1;
 }
+EXPORT_SYMBOL(trace_seq_putc);
 
 int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 {
-       if (len > ((PAGE_SIZE - 1) - s->len))
+       if (s->full)
+               return 0;
+
+       if (len > ((PAGE_SIZE - 1) - s->len)) {
+               s->full = 1;
                return 0;
+       }
 
        memcpy(s->buffer + s->len, mem, len);
        s->len += len;
@@ -165,6 +233,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
        const unsigned char *data = mem;
        int i, j;
 
+       if (s->full)
+               return 0;
+
 #ifdef __BIG_ENDIAN
        for (i = 0, j = 0; i < len; i++) {
 #else
@@ -182,9 +253,14 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
 {
        void *ret;
 
-       if (len > ((PAGE_SIZE - 1) - s->len))
+       if (s->full)
                return NULL;
 
+       if (len > ((PAGE_SIZE - 1) - s->len)) {
+               s->full = 1;
+               return NULL;
+       }
+
        ret = s->buffer + s->len;
        s->len += len;
 
@@ -195,8 +271,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 {
        unsigned char *p;
 
-       if (s->len >= (PAGE_SIZE - 1))
+       if (s->full)
                return 0;
+
+       if (s->len >= (PAGE_SIZE - 1)) {
+               s->full = 1;
+               return 0;
+       }
+
        p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
        if (!IS_ERR(p)) {
                p = mangle_path(s->buffer + s->len, p, "\n");
@@ -209,9 +291,86 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
                return 1;
        }
 
+       s->full = 1;
        return 0;
 }
 
+const char *
+ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+                      unsigned long flags,
+                      const struct trace_print_flags *flag_array)
+{
+       unsigned long mask;
+       const char *str;
+       const char *ret = p->buffer + p->len;
+       int i;
+
+       for (i = 0;  flag_array[i].name && flags; i++) {
+
+               mask = flag_array[i].mask;
+               if ((flags & mask) != mask)
+                       continue;
+
+               str = flag_array[i].name;
+               flags &= ~mask;
+               if (p->len && delim)
+                       trace_seq_puts(p, delim);
+               trace_seq_puts(p, str);
+       }
+
+       /* check for left over flags */
+       if (flags) {
+               if (p->len && delim)
+                       trace_seq_puts(p, delim);
+               trace_seq_printf(p, "0x%lx", flags);
+       }
+
+       trace_seq_putc(p, 0);
+
+       return ret;
+}
+EXPORT_SYMBOL(ftrace_print_flags_seq);
+
+const char *
+ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+                        const struct trace_print_flags *symbol_array)
+{
+       int i;
+       const char *ret = p->buffer + p->len;
+
+       for (i = 0;  symbol_array[i].name; i++) {
+
+               if (val != symbol_array[i].mask)
+                       continue;
+
+               trace_seq_puts(p, symbol_array[i].name);
+               break;
+       }
+
+       if (!p->len)
+               trace_seq_printf(p, "0x%lx", val);
+               
+       trace_seq_putc(p, 0);
+
+       return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq);
+
+const char *
+ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
+{
+       int i;
+       const char *ret = p->buffer + p->len;
+
+       for (i = 0; i < buf_len; i++)
+               trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
+
+       trace_seq_putc(p, 0);
+
+       return ret;
+}
+EXPORT_SYMBOL(ftrace_print_hex_seq);
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
@@ -274,6 +433,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
        unsigned long vmstart = 0;
        int ret = 1;
 
+       if (s->full)
+               return 0;
+
        if (mm) {
                const struct vm_area_struct *vma;
 
@@ -311,7 +473,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
                 * since individual threads might have already quit!
                 */
                rcu_read_lock();
-               task = find_task_by_vpid(entry->ent.tgid);
+               task = find_task_by_vpid(entry->tgid);
                if (task)
                        mm = get_task_mm(task);
                rcu_read_unlock();
@@ -322,17 +484,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 
                if (ip == ULONG_MAX || !ret)
                        break;
-               if (i && ret)
-                       ret = trace_seq_puts(s, " <- ");
+               if (ret)
+                       ret = trace_seq_puts(s, " => ");
                if (!ip) {
                        if (ret)
                                ret = trace_seq_puts(s, "??");
+                       if (ret)
+                               ret = trace_seq_puts(s, "\n");
                        continue;
                }
                if (!ret)
                        break;
                if (ret)
                        ret = seq_print_user_ip(s, mm, ip, sym_flags);
+               ret = trace_seq_puts(s, "\n");
        }
 
        if (mm)
@@ -361,18 +526,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
        return ret;
 }
 
-static int
-lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+/**
+ * trace_print_lat_fmt - print the irq, preempt and lockdep fields
+ * @s: trace seq struct to write to
+ * @entry: The trace entry field from the ring buffer
+ *
+ * Prints the generic fields of irqs off, in hard or softirq, preempt
+ * count and lock depth.
+ */
+int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 {
        int hardirq, softirq;
-       char comm[TASK_COMM_LEN];
+       int ret;
 
-       trace_find_cmdline(entry->pid, comm);
        hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
        softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
 
-       if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
-                             comm, entry->pid, cpu,
+       if (!trace_seq_printf(s, "%c%c%c",
                              (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
                                (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
                                  'X' : '.',
@@ -383,8 +553,31 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
                return 0;
 
        if (entry->preempt_count)
-               return trace_seq_printf(s, "%x", entry->preempt_count);
-       return trace_seq_puts(s, ".");
+               ret = trace_seq_printf(s, "%x", entry->preempt_count);
+       else
+               ret = trace_seq_putc(s, '.');
+
+       if (!ret)
+               return 0;
+
+       if (entry->lock_depth < 0)
+               return trace_seq_putc(s, '.');
+
+       return trace_seq_printf(s, "%d", entry->lock_depth);
+}
+
+static int
+lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+{
+       char comm[TASK_COMM_LEN];
+
+       trace_find_cmdline(entry->pid, comm);
+
+       if (!trace_seq_printf(s, "%8.8s-%-5d %3d",
+                             comm, entry->pid, cpu))
+               return 0;
+
+       return trace_print_lat_fmt(s, entry);
 }
 
 static unsigned long preempt_mark_thresh = 100;
@@ -466,6 +659,7 @@ static int task_state_char(unsigned long state)
  * @type: the type of event to look for
  *
  * Returns an event of type @type otherwise NULL
+ * Called with trace_event_read_lock() held.
  */
 struct trace_event *ftrace_find_event(int type)
 {
@@ -475,7 +669,7 @@ struct trace_event *ftrace_find_event(int type)
 
        key = type & (EVENT_HASHSIZE - 1);
 
-       hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+       hlist_for_each_entry(event, n, &event_hash[key], node) {
                if (event->type == type)
                        return event;
        }
@@ -483,6 +677,46 @@ struct trace_event *ftrace_find_event(int type)
        return NULL;
 }
 
+static LIST_HEAD(ftrace_event_list);
+
+static int trace_search_list(struct list_head **list)
+{
+       struct trace_event *e;
+       int last = __TRACE_LAST_TYPE;
+
+       if (list_empty(&ftrace_event_list)) {
+               *list = &ftrace_event_list;
+               return last + 1;
+       }
+
+       /*
+        * We used up all possible max events,
+        * lets see if somebody freed one.
+        */
+       list_for_each_entry(e, &ftrace_event_list, list) {
+               if (e->type != last + 1)
+                       break;
+               last++;
+       }
+
+       /* Did we used up all 65 thousand events??? */
+       if ((last + 1) > FTRACE_MAX_EVENT)
+               return 0;
+
+       *list = &e->list;
+       return last + 1;
+}
+
+void trace_event_read_lock(void)
+{
+       down_read(&trace_event_mutex);
+}
+
+void trace_event_read_unlock(void)
+{
+       up_read(&trace_event_mutex);
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -503,22 +737,42 @@ int register_ftrace_event(struct trace_event *event)
        unsigned key;
        int ret = 0;
 
-       mutex_lock(&trace_event_mutex);
+       down_write(&trace_event_mutex);
 
-       if (!event) {
-               ret = next_event_type++;
+       if (WARN_ON(!event))
                goto out;
-       }
 
-       if (!event->type)
-               event->type = next_event_type++;
-       else if (event->type > __TRACE_LAST_TYPE) {
+       INIT_LIST_HEAD(&event->list);
+
+       if (!event->type) {
+               struct list_head *list = NULL;
+
+               if (next_event_type > FTRACE_MAX_EVENT) {
+
+                       event->type = trace_search_list(&list);
+                       if (!event->type)
+                               goto out;
+
+               } else {
+                       
+                       event->type = next_event_type++;
+                       list = &ftrace_event_list;
+               }
+
+               if (WARN_ON(ftrace_find_event(event->type)))
+                       goto out;
+
+               list_add_tail(&event->list, list);
+
+       } else if (event->type > __TRACE_LAST_TYPE) {
                printk(KERN_WARNING "Need to add type to trace.h\n");
                WARN_ON(1);
-       }
-
-       if (ftrace_find_event(event->type))
                goto out;
+       } else {
+               /* Is this event already used */
+               if (ftrace_find_event(event->type))
+                       goto out;
+       }
 
        if (event->trace == NULL)
                event->trace = trace_nop_print;
@@ -531,25 +785,35 @@ int register_ftrace_event(struct trace_event *event)
 
        key = event->type & (EVENT_HASHSIZE - 1);
 
-       hlist_add_head_rcu(&event->node, &event_hash[key]);
+       hlist_add_head(&event->node, &event_hash[key]);
 
        ret = event->type;
  out:
-       mutex_unlock(&trace_event_mutex);
+       up_write(&trace_event_mutex);
 
        return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_event);
 
+/*
+ * Used by module code with the trace_event_mutex held for write.
+ */
+int __unregister_ftrace_event(struct trace_event *event)
+{
+       hlist_del(&event->node);
+       list_del(&event->list);
+       return 0;
+}
+
 /**
  * unregister_ftrace_event - remove a no longer used event
  * @event: the event to remove
  */
 int unregister_ftrace_event(struct trace_event *event)
 {
-       mutex_lock(&trace_event_mutex);
-       hlist_del(&event->node);
-       mutex_unlock(&trace_event_mutex);
+       down_write(&trace_event_mutex);
+       __unregister_ftrace_event(event);
+       up_write(&trace_event_mutex);
 
        return 0;
 }
@@ -687,7 +951,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
        trace_assign_type(field, iter->ent);
 
        if (!S)
-               task_state_char(field->prev_state);
+               S = task_state_char(field->prev_state);
        T = task_state_char(field->next_state);
        if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
                              field->prev_pid,
@@ -722,7 +986,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
        trace_assign_type(field, iter->ent);
 
        if (!S)
-               task_state_char(field->prev_state);
+               S = task_state_char(field->prev_state);
        T = task_state_char(field->next_state);
 
        SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
@@ -846,14 +1110,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
        trace_assign_type(field, iter->ent);
 
+       if (!trace_seq_puts(s, "<stack trace>\n"))
+               goto partial;
        for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-               if (i) {
-                       if (!trace_seq_puts(s, " <= "))
-                               goto partial;
+               if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
+                       break;
+               if (!trace_seq_puts(s, " => "))
+                       goto partial;
 
-                       if (!seq_print_ip_sym(s, field->caller[i], flags))
-                               goto partial;
-               }
+               if (!seq_print_ip_sym(s, field->caller[i], flags))
+                       goto partial;
                if (!trace_seq_puts(s, "\n"))
                        goto partial;
        }
@@ -881,10 +1147,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 
        trace_assign_type(field, iter->ent);
 
-       if (!seq_print_userip_objs(field, s, flags))
+       if (!trace_seq_puts(s, "<user stack trace>\n"))
                goto partial;
 
-       if (!trace_seq_putc(s, '\n'))
+       if (!seq_print_userip_objs(field, s, flags))
                goto partial;
 
        return TRACE_TYPE_HANDLED;