perf tools: Add trace event debugfs IO handler
[safe/jmp/linux-2.6] / tools / perf / builtin-annotate.c
index 6a08da4..96d421f 100644 (file)
 #include "util/util.h"
 
 #include "util/color.h"
-#include "util/list.h"
+#include <linux/list.h>
 #include "util/cache.h"
-#include "util/rbtree.h"
+#include <linux/rbtree.h>
 #include "util/symbol.h"
 #include "util/string.h"
 
 #include "perf.h"
+#include "util/debug.h"
 
 #include "util/parse-options.h"
 #include "util/parse-events.h"
-
-#define SHOW_KERNEL    1
-#define SHOW_USER      2
-#define SHOW_HV                4
+#include "util/thread.h"
 
 static char            const *input_name = "perf.data";
-static char            *vmlinux = "vmlinux";
 
 static char            default_sort_order[] = "comm,symbol";
 static char            *sort_order = default_sort_order;
@@ -34,381 +31,23 @@ static char                *sort_order = default_sort_order;
 static int             input;
 static int             show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
-static int             dump_trace = 0;
-#define dprintf(x...)  do { if (dump_trace) printf(x); } while (0)
-
-static int             verbose;
+static int             full_paths;
 
 static int             print_line;
 
 static unsigned long   page_size;
 static unsigned long   mmap_window = 32;
 
-struct ip_event {
-       struct perf_event_header header;
-       __u64 ip;
-       __u32 pid, tid;
-};
-
-struct mmap_event {
-       struct perf_event_header header;
-       __u32 pid, tid;
-       __u64 start;
-       __u64 len;
-       __u64 pgoff;
-       char filename[PATH_MAX];
-};
-
-struct comm_event {
-       struct perf_event_header header;
-       __u32 pid, tid;
-       char comm[16];
-};
-
-struct fork_event {
-       struct perf_event_header header;
-       __u32 pid, ppid;
-};
-
-struct period_event {
-       struct perf_event_header header;
-       __u64 time;
-       __u64 id;
-       __u64 sample_period;
-};
-
-typedef union event_union {
-       struct perf_event_header        header;
-       struct ip_event                 ip;
-       struct mmap_event               mmap;
-       struct comm_event               comm;
-       struct fork_event               fork;
-       struct period_event             period;
-} event_t;
+static struct rb_root  threads;
+static struct thread   *last_match;
 
 
 struct sym_ext {
+       struct rb_node  node;
        double          percent;
        char            *path;
 };
 
-static LIST_HEAD(dsos);
-static struct dso *kernel_dso;
-static struct dso *vdso;
-
-
-static void dsos__add(struct dso *dso)
-{
-       list_add_tail(&dso->node, &dsos);
-}
-
-static struct dso *dsos__find(const char *name)
-{
-       struct dso *pos;
-
-       list_for_each_entry(pos, &dsos, node)
-               if (strcmp(pos->name, name) == 0)
-                       return pos;
-       return NULL;
-}
-
-static struct dso *dsos__findnew(const char *name)
-{
-       struct dso *dso = dsos__find(name);
-       int nr;
-
-       if (dso)
-               return dso;
-
-       dso = dso__new(name, 0);
-       if (!dso)
-               goto out_delete_dso;
-
-       nr = dso__load(dso, NULL, verbose);
-       if (nr < 0) {
-               if (verbose)
-                       fprintf(stderr, "Failed to open: %s\n", name);
-               goto out_delete_dso;
-       }
-       if (!nr && verbose) {
-               fprintf(stderr,
-               "No symbols found in: %s, maybe install a debug package?\n",
-                               name);
-       }
-
-       dsos__add(dso);
-
-       return dso;
-
-out_delete_dso:
-       dso__delete(dso);
-       return NULL;
-}
-
-static void dsos__fprintf(FILE *fp)
-{
-       struct dso *pos;
-
-       list_for_each_entry(pos, &dsos, node)
-               dso__fprintf(pos, fp);
-}
-
-static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
-{
-       return dso__find_symbol(kernel_dso, ip);
-}
-
-static int load_kernel(void)
-{
-       int err;
-
-       kernel_dso = dso__new("[kernel]", 0);
-       if (!kernel_dso)
-               return -1;
-
-       err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
-       if (err) {
-               dso__delete(kernel_dso);
-               kernel_dso = NULL;
-       } else
-               dsos__add(kernel_dso);
-
-       vdso = dso__new("[vdso]", 0);
-       if (!vdso)
-               return -1;
-
-       vdso->find_symbol = vdso__find_symbol;
-
-       dsos__add(vdso);
-
-       return err;
-}
-
-struct map {
-       struct list_head node;
-       __u64    start;
-       __u64    end;
-       __u64    pgoff;
-       __u64    (*map_ip)(struct map *, __u64);
-       struct dso       *dso;
-};
-
-static __u64 map__map_ip(struct map *map, __u64 ip)
-{
-       return ip - map->start + map->pgoff;
-}
-
-static __u64 vdso__map_ip(struct map *map, __u64 ip)
-{
-       return ip;
-}
-
-static struct map *map__new(struct mmap_event *event)
-{
-       struct map *self = malloc(sizeof(*self));
-
-       if (self != NULL) {
-               const char *filename = event->filename;
-
-               self->start = event->start;
-               self->end   = event->start + event->len;
-               self->pgoff = event->pgoff;
-
-               self->dso = dsos__findnew(filename);
-               if (self->dso == NULL)
-                       goto out_delete;
-
-               if (self->dso == vdso)
-                       self->map_ip = vdso__map_ip;
-               else
-                       self->map_ip = map__map_ip;
-       }
-       return self;
-out_delete:
-       free(self);
-       return NULL;
-}
-
-static struct map *map__clone(struct map *self)
-{
-       struct map *map = malloc(sizeof(*self));
-
-       if (!map)
-               return NULL;
-
-       memcpy(map, self, sizeof(*self));
-
-       return map;
-}
-
-static int map__overlap(struct map *l, struct map *r)
-{
-       if (l->start > r->start) {
-               struct map *t = l;
-               l = r;
-               r = t;
-       }
-
-       if (l->end > r->start)
-               return 1;
-
-       return 0;
-}
-
-static size_t map__fprintf(struct map *self, FILE *fp)
-{
-       return fprintf(fp, " %Lx-%Lx %Lx %s\n",
-                      self->start, self->end, self->pgoff, self->dso->name);
-}
-
-
-struct thread {
-       struct rb_node   rb_node;
-       struct list_head maps;
-       pid_t            pid;
-       char             *comm;
-};
-
-static struct thread *thread__new(pid_t pid)
-{
-       struct thread *self = malloc(sizeof(*self));
-
-       if (self != NULL) {
-               self->pid = pid;
-               self->comm = malloc(32);
-               if (self->comm)
-                       snprintf(self->comm, 32, ":%d", self->pid);
-               INIT_LIST_HEAD(&self->maps);
-       }
-
-       return self;
-}
-
-static int thread__set_comm(struct thread *self, const char *comm)
-{
-       if (self->comm)
-               free(self->comm);
-       self->comm = strdup(comm);
-       return self->comm ? 0 : -ENOMEM;
-}
-
-static size_t thread__fprintf(struct thread *self, FILE *fp)
-{
-       struct map *pos;
-       size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
-
-       list_for_each_entry(pos, &self->maps, node)
-               ret += map__fprintf(pos, fp);
-
-       return ret;
-}
-
-
-static struct rb_root threads;
-static struct thread *last_match;
-
-static struct thread *threads__findnew(pid_t pid)
-{
-       struct rb_node **p = &threads.rb_node;
-       struct rb_node *parent = NULL;
-       struct thread *th;
-
-       /*
-        * Font-end cache - PID lookups come in blocks,
-        * so most of the time we dont have to look up
-        * the full rbtree:
-        */
-       if (last_match && last_match->pid == pid)
-               return last_match;
-
-       while (*p != NULL) {
-               parent = *p;
-               th = rb_entry(parent, struct thread, rb_node);
-
-               if (th->pid == pid) {
-                       last_match = th;
-                       return th;
-               }
-
-               if (pid < th->pid)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       th = thread__new(pid);
-       if (th != NULL) {
-               rb_link_node(&th->rb_node, parent, p);
-               rb_insert_color(&th->rb_node, &threads);
-               last_match = th;
-       }
-
-       return th;
-}
-
-static void thread__insert_map(struct thread *self, struct map *map)
-{
-       struct map *pos, *tmp;
-
-       list_for_each_entry_safe(pos, tmp, &self->maps, node) {
-               if (map__overlap(pos, map)) {
-                       list_del_init(&pos->node);
-                       /* XXX leaks dsos */
-                       free(pos);
-               }
-       }
-
-       list_add_tail(&map->node, &self->maps);
-}
-
-static int thread__fork(struct thread *self, struct thread *parent)
-{
-       struct map *map;
-
-       if (self->comm)
-               free(self->comm);
-       self->comm = strdup(parent->comm);
-       if (!self->comm)
-               return -ENOMEM;
-
-       list_for_each_entry(map, &parent->maps, node) {
-               struct map *new = map__clone(map);
-               if (!new)
-                       return -ENOMEM;
-               thread__insert_map(self, new);
-       }
-
-       return 0;
-}
-
-static struct map *thread__find_map(struct thread *self, __u64 ip)
-{
-       struct map *pos;
-
-       if (self == NULL)
-               return NULL;
-
-       list_for_each_entry(pos, &self->maps, node)
-               if (ip >= pos->start && ip <= pos->end)
-                       return pos;
-
-       return NULL;
-}
-
-static size_t threads__fprintf(FILE *fp)
-{
-       size_t ret = 0;
-       struct rb_node *nd;
-
-       for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
-               struct thread *pos = rb_entry(nd, struct thread, rb_node);
-
-               ret += thread__fprintf(pos, fp);
-       }
-
-       return ret;
-}
-
 /*
  * histogram, sorted on item, collects counts
  */
@@ -422,7 +61,7 @@ struct hist_entry {
        struct map       *map;
        struct dso       *dso;
        struct symbol    *sym;
-       __u64    ip;
+       u64      ip;
        char             level;
 
        uint32_t         count;
@@ -435,7 +74,7 @@ struct hist_entry {
 struct sort_entry {
        struct list_head list;
 
-       char *header;
+       const char *header;
 
        int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
        int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
@@ -527,7 +166,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self)
        if (self->dso)
                return fprintf(fp, "%-25s", self->dso->name);
 
-       return fprintf(fp, "%016llx         ", (__u64)self->ip);
+       return fprintf(fp, "%016llx         ", (u64)self->ip);
 }
 
 static struct sort_entry sort_dso = {
@@ -541,7 +180,7 @@ static struct sort_entry sort_dso = {
 static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-       __u64 ip_l, ip_r;
+       u64 ip_l, ip_r;
 
        if (left->sym == right->sym)
                return 0;
@@ -558,13 +197,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
        size_t ret = 0;
 
        if (verbose)
-               ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
+               ret += fprintf(fp, "%#018llx  ", (u64)self->ip);
 
        if (self->sym) {
                ret += fprintf(fp, "[%c] %s",
                        self->dso == kernel_dso ? 'k' : '.', self->sym->name);
        } else {
-               ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+               ret += fprintf(fp, "%#016llx", (u64)self->ip);
        }
 
        return ret;
@@ -579,7 +218,7 @@ static struct sort_entry sort_sym = {
 static int sort__need_collapse = 0;
 
 struct sort_dimension {
-       char                    *name;
+       const char              *name;
        struct sort_entry       *entry;
        int                     taken;
 };
@@ -595,7 +234,7 @@ static LIST_HEAD(hist_entry__sort_list);
 
 static int sort_dimension__add(char *tok)
 {
-       int i;
+       unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
                struct sort_dimension *sd = &sort_dimensions[i];
@@ -655,7 +294,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 /*
  * collect histogram counts
  */
-static void hist_hit(struct hist_entry *he, __u64 ip)
+static void hist_hit(struct hist_entry *he, u64 ip)
 {
        unsigned int sym_size, offset;
        struct symbol *sym = he->sym;
@@ -684,7 +323,7 @@ static void hist_hit(struct hist_entry *he, __u64 ip)
 
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-               struct symbol *sym, __u64 ip, char level)
+               struct symbol *sym, u64 ip, char level)
 {
        struct rb_node **p = &hist.rb_node;
        struct rb_node *parent = NULL;
@@ -834,7 +473,7 @@ static void output__resort(void)
 
 static void register_idle_thread(void)
 {
-       struct thread *thread = threads__findnew(0);
+       struct thread *thread = threads__findnew(0, &threads, &last_match);
 
        if (thread == NULL ||
                        thread__set_comm(thread, "[idle]")) {
@@ -850,23 +489,25 @@ static unsigned long total = 0,
                     total_unknown = 0;
 
 static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
        char level;
        int show = 0;
        struct dso *dso = NULL;
-       struct thread *thread = threads__findnew(event->ip.pid);
-       __u64 ip = event->ip.ip;
+       struct thread *thread;
+       u64 ip = event->ip.ip;
        struct map *map = NULL;
 
-       dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+
+       dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
                event->header.misc,
                event->ip.pid,
                (void *)(long)ip);
 
-       dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
        if (thread == NULL) {
                fprintf(stderr, "problem processing %d event, skipping it.\n",
@@ -880,7 +521,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 
                dso = kernel_dso;
 
-               dprintf(" ...... dso: %s\n", dso->name);
+               dump_printf(" ...... dso: %s\n", dso->name);
 
        } else if (event->header.misc & PERF_EVENT_MISC_USER) {
 
@@ -901,12 +542,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
                        if ((long long)ip < 0)
                                dso = kernel_dso;
                }
-               dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+               dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
 
        } else {
                show = SHOW_HV;
                level = 'H';
-               dprintf(" ...... dso: [hypervisor]\n");
+               dump_printf(" ...... dso: [hypervisor]\n");
        }
 
        if (show & show_mask) {
@@ -929,10 +570,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread = threads__findnew(event->mmap.pid);
-       struct map *map = map__new(&event->mmap);
+       struct thread *thread;
+       struct map *map = map__new(&event->mmap, NULL, 0);
+
+       thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-       dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+       dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
                event->mmap.pid,
@@ -942,7 +585,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
                event->mmap.filename);
 
        if (thread == NULL || map == NULL) {
-               dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+               dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
                return 0;
        }
 
@@ -955,16 +598,17 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread = threads__findnew(event->comm.pid);
+       struct thread *thread;
 
-       dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+       thread = threads__findnew(event->comm.pid, &threads, &last_match);
+       dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
                event->comm.comm, event->comm.pid);
 
        if (thread == NULL ||
            thread__set_comm(thread, event->comm.comm)) {
-               dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+               dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
                return -1;
        }
        total_comm++;
@@ -975,16 +619,18 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread = threads__findnew(event->fork.pid);
-       struct thread *parent = threads__findnew(event->fork.ppid);
+       struct thread *thread;
+       struct thread *parent;
 
-       dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+       thread = threads__findnew(event->fork.pid, &threads, &last_match);
+       parent = threads__findnew(event->fork.ppid, &threads, &last_match);
+       dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
                event->fork.pid, event->fork.ppid);
 
        if (!thread || !parent || thread__fork(thread, parent)) {
-               dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+               dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
                return -1;
        }
        total_fork++;
@@ -993,25 +639,12 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 }
 
 static int
-process_period_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
-               (void *)(offset + head),
-               (void *)(long)(event->header.size),
-               event->period.time,
-               event->period.id,
-               event->period.sample_period);
-
-       return 0;
-}
-
-static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-               return process_overflow_event(event, offset, head);
-
        switch (event->header.type) {
+       case PERF_EVENT_SAMPLE:
+               return process_sample_event(event, offset, head);
+
        case PERF_EVENT_MMAP:
                return process_mmap_event(event, offset, head);
 
@@ -1020,9 +653,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
        case PERF_EVENT_FORK:
                return process_fork_event(event, offset, head);
-
-       case PERF_EVENT_PERIOD:
-               return process_period_event(event, offset, head);
        /*
         * We dont process them right now but they are fine:
         */
@@ -1039,14 +669,14 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 }
 
 static int
-parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
+parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
 {
        char *line = NULL, *tmp, *tmp2;
        static const char *prev_line;
        static const char *prev_color;
        unsigned int offset;
        size_t line_len;
-       __u64 line_ip;
+       s64 line_ip;
        int ret;
        char *c;
 
@@ -1086,30 +716,20 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
                const char *path = NULL;
                unsigned int hits = 0;
                double percent = 0.0;
-               char *color = PERF_COLOR_NORMAL;
+               const char *color;
                struct sym_ext *sym_ext = sym->priv;
 
                offset = line_ip - start;
                if (offset < len)
                        hits = sym->hist[offset];
 
-               if (sym_ext) {
+               if (offset < len && sym_ext) {
                        path = sym_ext[offset].path;
                        percent = sym_ext[offset].percent;
                } else if (sym->hist_sum)
                        percent = 100.0 * hits / sym->hist_sum;
 
-               /*
-                * We color high-overhead entries in red, mid-overhead
-                * entries in green - and keep the low overhead places
-                * normal:
-                */
-               if (percent >= 5.0)
-                       color = PERF_COLOR_RED;
-               else {
-                       if (percent > 0.5)
-                               color = PERF_COLOR_GREEN;
-               }
+               color = get_percent_color(percent);
 
                /*
                 * Also color the filename and line if needed, with
@@ -1138,6 +758,28 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
        return 0;
 }
 
+static struct rb_root root_sym_ext;
+
+static void insert_source_line(struct sym_ext *sym_ext)
+{
+       struct sym_ext *iter;
+       struct rb_node **p = &root_sym_ext.rb_node;
+       struct rb_node *parent = NULL;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct sym_ext, node);
+
+               if (sym_ext->percent > iter->percent)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+
+       rb_link_node(&sym_ext->node, parent, p);
+       rb_insert_color(&sym_ext->node, &root_sym_ext);
+}
+
 static void free_source_line(struct symbol *sym, int len)
 {
        struct sym_ext *sym_ext = sym->priv;
@@ -1151,10 +793,12 @@ static void free_source_line(struct symbol *sym, int len)
        free(sym_ext);
 
        sym->priv = NULL;
+       root_sym_ext = RB_ROOT;
 }
 
 /* Get the filename:line for the colored entries */
-static void get_source_line(struct symbol *sym, __u64 start, int len)
+static void
+get_source_line(struct symbol *sym, u64 start, int len, const char *filename)
 {
        int i;
        char cmd[PATH_MAX * 2];
@@ -1172,7 +816,7 @@ static void get_source_line(struct symbol *sym, __u64 start, int len)
        for (i = 0; i < len; i++) {
                char *path = NULL;
                size_t line_len;
-               __u64 offset;
+               u64 offset;
                FILE *fp;
 
                sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum;
@@ -1180,7 +824,7 @@ static void get_source_line(struct symbol *sym, __u64 start, int len)
                        continue;
 
                offset = start + i;
-               sprintf(cmd, "addr2line -e %s %016llx", vmlinux, offset);
+               sprintf(cmd, "addr2line -e %s %016llx", filename, offset);
                fp = popen(cmd, "r");
                if (!fp)
                        continue;
@@ -1188,47 +832,86 @@ static void get_source_line(struct symbol *sym, __u64 start, int len)
                if (getline(&path, &line_len, fp) < 0 || !line_len)
                        goto next;
 
-               sym_ext[i].path = malloc(sizeof(char) * line_len);
+               sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
                if (!sym_ext[i].path)
                        goto next;
 
                strcpy(sym_ext[i].path, path);
+               insert_source_line(&sym_ext[i]);
 
        next:
                pclose(fp);
        }
 }
 
+static void print_summary(const char *filename)
+{
+       struct sym_ext *sym_ext;
+       struct rb_node *node;
+
+       printf("\nSorted summary for file %s\n", filename);
+       printf("----------------------------------------------\n\n");
+
+       if (RB_EMPTY_ROOT(&root_sym_ext)) {
+               printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+               return;
+       }
+
+       node = rb_first(&root_sym_ext);
+       while (node) {
+               double percent;
+               const char *color;
+               char *path;
+
+               sym_ext = rb_entry(node, struct sym_ext, node);
+               percent = sym_ext->percent;
+               color = get_percent_color(percent);
+               path = sym_ext->path;
+
+               color_fprintf(stdout, color, " %7.2f %s", percent, path);
+               node = rb_next(node);
+       }
+}
+
 static void annotate_sym(struct dso *dso, struct symbol *sym)
 {
-       char *filename = dso->name;
-       __u64 start, end, len;
+       const char *filename = dso->name, *d_filename;
+       u64 start, end, len;
        char command[PATH_MAX*2];
        FILE *file;
 
        if (!filename)
                return;
-       if (dso == kernel_dso)
-               filename = vmlinux;
-
-       printf("\n------------------------------------------------\n");
-       printf(" Percent |      Source code & Disassembly of %s\n", filename);
-       printf("------------------------------------------------\n");
-
-       if (verbose >= 2)
-               printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+       if (sym->module)
+               filename = sym->module->path;
+       else if (dso == kernel_dso)
+               filename = vmlinux_name;
 
        start = sym->obj_start;
        if (!start)
                start = sym->start;
+       if (full_paths)
+               d_filename = filename;
+       else
+               d_filename = basename(filename);
 
        end = start + sym->end - sym->start + 1;
        len = sym->end - sym->start;
 
-       if (print_line)
-               get_source_line(sym, start, len);
+       if (print_line) {
+               get_source_line(sym, start, len, filename);
+               print_summary(filename);
+       }
 
-       sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename);
+       printf("\n\n------------------------------------------------\n");
+       printf(" Percent |      Source code & Disassembly of %s\n", d_filename);
+       printf("------------------------------------------------\n");
+
+       if (verbose >= 2)
+               printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+
+       sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
+                       (u64)start, (u64)end, filename, filename);
 
        if (verbose >= 3)
                printf("doing: %s\n", command);
@@ -1243,7 +926,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
        }
 
        pclose(file);
-       free_source_line(sym, len);
+       if (print_line)
+               free_source_line(sym, len);
 }
 
 static void find_annotations(void)
@@ -1273,7 +957,7 @@ static int __cmd_annotate(void)
        int ret, rc = EXIT_FAILURE;
        unsigned long offset = 0;
        unsigned long head = 0;
-       struct stat stat;
+       struct stat input_stat;
        event_t *event;
        uint32_t size;
        char *buf;
@@ -1286,13 +970,13 @@ static int __cmd_annotate(void)
                exit(-1);
        }
 
-       ret = fstat(input, &stat);
+       ret = fstat(input, &input_stat);
        if (ret < 0) {
                perror("failed to stat file");
                exit(-1);
        }
 
-       if (!stat.st_size) {
+       if (!input_stat.st_size) {
                fprintf(stderr, "zero-sized file, nothing to do!\n");
                exit(0);
        }
@@ -1319,10 +1003,10 @@ more:
 
        if (head + event->header.size >= page_size * mmap_window) {
                unsigned long shift = page_size * (head / page_size);
-               int ret;
+               int munmap_ret;
 
-               ret = munmap(buf, page_size * mmap_window);
-               assert(ret == 0);
+               munmap_ret = munmap(buf, page_size * mmap_window);
+               assert(munmap_ret == 0);
 
                offset += shift;
                head -= shift;
@@ -1331,14 +1015,14 @@ more:
 
        size = event->header.size;
 
-       dprintf("%p [%p]: event: %d\n",
+       dump_printf("%p [%p]: event: %d\n",
                        (void *)(offset + head),
                        (void *)(long)event->header.size,
                        event->header.type);
 
        if (!size || process_event(event, offset, head) < 0) {
 
-               dprintf("%p [%p]: skipping unknown header type: %d\n",
+               dump_printf("%p [%p]: skipping unknown header type: %d\n",
                        (void *)(offset + head),
                        (void *)(long)(event->header.size),
                        event->header.type);
@@ -1358,23 +1042,23 @@ more:
 
        head += size;
 
-       if (offset + head < stat.st_size)
+       if (offset + head < (unsigned long)input_stat.st_size)
                goto more;
 
        rc = EXIT_SUCCESS;
        close(input);
 
-       dprintf("      IP events: %10ld\n", total);
-       dprintf("    mmap events: %10ld\n", total_mmap);
-       dprintf("    comm events: %10ld\n", total_comm);
-       dprintf("    fork events: %10ld\n", total_fork);
-       dprintf(" unknown events: %10ld\n", total_unknown);
+       dump_printf("      IP events: %10ld\n", total);
+       dump_printf("    mmap events: %10ld\n", total_mmap);
+       dump_printf("    comm events: %10ld\n", total_comm);
+       dump_printf("    fork events: %10ld\n", total_fork);
+       dump_printf(" unknown events: %10ld\n", total_unknown);
 
        if (dump_trace)
                return 0;
 
        if (verbose >= 3)
-               threads__fprintf(stdout);
+               threads__fprintf(stdout, &threads);
 
        if (verbose >= 2)
                dsos__fprintf(stdout);
@@ -1401,9 +1085,13 @@ static const struct option options[] = {
                    "be more verbose (show symbol address, etc)"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
-       OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+       OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
+       OPT_BOOLEAN('m', "modules", &modules,
+                   "load module symbols - WARNING: use only with -k and LIVE kernel"),
        OPT_BOOLEAN('l', "print-line", &print_line,
                    "print matching source lines (may be slow)"),
+       OPT_BOOLEAN('P', "full-paths", &full_paths,
+                   "Don't shorten the displayed pathnames"),
        OPT_END()
 };
 
@@ -1422,7 +1110,7 @@ static void setup_sorting(void)
        free(str);
 }
 
-int cmd_annotate(int argc, const char **argv, const char *prefix)
+int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 {
        symbol__init();