4 * Builtin report command: Analyze the perf.data input file,
5 * look up and read DSOs and symbol information and display
6 * a histogram of results, along various sorting keys.
10 #include "util/util.h"
12 #include "util/color.h"
13 #include "util/list.h"
14 #include "util/cache.h"
15 #include "util/rbtree.h"
16 #include "util/symbol.h"
17 #include "util/string.h"
21 #include "util/parse-options.h"
22 #include "util/parse-events.h"
28 static char const *input_name = "perf.data";
29 static char *vmlinux = NULL;
31 static char default_sort_order[] = "comm,dso";
32 static char *sort_order = default_sort_order;
35 static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
37 static int dump_trace = 0;
38 #define dprintf(x...) do { if (dump_trace) printf(x); } while (0)
41 static int full_paths;
43 static unsigned long page_size;
44 static unsigned long mmap_window = 32;
47 struct perf_event_header header;
53 struct perf_event_header header;
58 char filename[PATH_MAX];
62 struct perf_event_header header;
68 struct perf_event_header header;
72 typedef union event_union {
73 struct perf_event_header header;
75 struct mmap_event mmap;
76 struct comm_event comm;
77 struct fork_event fork;
80 static LIST_HEAD(dsos);
81 static struct dso *kernel_dso;
83 static void dsos__add(struct dso *dso)
85 list_add_tail(&dso->node, &dsos);
88 static struct dso *dsos__find(const char *name)
92 list_for_each_entry(pos, &dsos, node)
93 if (strcmp(pos->name, name) == 0)
98 static struct dso *dsos__findnew(const char *name)
100 struct dso *dso = dsos__find(name);
106 dso = dso__new(name, 0);
110 nr = dso__load(dso, NULL, verbose);
113 fprintf(stderr, "Failed to open: %s\n", name);
116 if (!nr && verbose) {
118 "No symbols found in: %s, maybe install a debug package?\n",
131 static void dsos__fprintf(FILE *fp)
135 list_for_each_entry(pos, &dsos, node)
136 dso__fprintf(pos, fp);
139 static int load_kernel(void)
143 kernel_dso = dso__new("[kernel]", 0);
147 err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
149 dso__delete(kernel_dso);
152 dsos__add(kernel_dso);
157 static char __cwd[PATH_MAX];
158 static char *cwd = __cwd;
161 static int strcommon(const char *pathname)
165 while (pathname[n] == cwd[n] && n < cwdlen)
172 struct list_head node;
179 static struct map *map__new(struct mmap_event *event)
181 struct map *self = malloc(sizeof(*self));
184 const char *filename = event->filename;
185 char newfilename[PATH_MAX];
188 int n = strcommon(filename);
191 snprintf(newfilename, sizeof(newfilename),
192 ".%s", filename + n);
193 filename = newfilename;
197 self->start = event->start;
198 self->end = event->start + event->len;
199 self->pgoff = event->pgoff;
201 self->dso = dsos__findnew(filename);
202 if (self->dso == NULL)
211 static struct map *map__clone(struct map *self)
213 struct map *map = malloc(sizeof(*self));
218 memcpy(map, self, sizeof(*self));
223 static int map__overlap(struct map *l, struct map *r)
225 if (l->start > r->start) {
231 if (l->end > r->start)
238 struct rb_node rb_node;
239 struct list_head maps;
244 static struct thread *thread__new(pid_t pid)
246 struct thread *self = malloc(sizeof(*self));
250 self->comm = malloc(32);
252 snprintf(self->comm, 32, ":%d", self->pid);
253 INIT_LIST_HEAD(&self->maps);
259 static int thread__set_comm(struct thread *self, const char *comm)
263 self->comm = strdup(comm);
264 return self->comm ? 0 : -ENOMEM;
267 static struct rb_root threads;
268 static struct thread *last_match;
270 static struct thread *threads__findnew(pid_t pid)
272 struct rb_node **p = &threads.rb_node;
273 struct rb_node *parent = NULL;
277 * Font-end cache - PID lookups come in blocks,
278 * so most of the time we dont have to look up
281 if (last_match && last_match->pid == pid)
286 th = rb_entry(parent, struct thread, rb_node);
288 if (th->pid == pid) {
299 th = thread__new(pid);
301 rb_link_node(&th->rb_node, parent, p);
302 rb_insert_color(&th->rb_node, &threads);
309 static void thread__insert_map(struct thread *self, struct map *map)
311 struct map *pos, *tmp;
313 list_for_each_entry_safe(pos, tmp, &self->maps, node) {
314 if (map__overlap(pos, map)) {
315 list_del_init(&pos->node);
321 list_add_tail(&map->node, &self->maps);
324 static int thread__fork(struct thread *self, struct thread *parent)
330 self->comm = strdup(parent->comm);
334 list_for_each_entry(map, &parent->maps, node) {
335 struct map *new = map__clone(map);
338 thread__insert_map(self, new);
344 static struct map *thread__find_map(struct thread *self, uint64_t ip)
351 list_for_each_entry(pos, &self->maps, node)
352 if (ip >= pos->start && ip <= pos->end)
359 * histogram, sorted on item, collects counts
362 static struct rb_root hist;
365 struct rb_node rb_node;
367 struct thread *thread;
378 * configurable sorting bits
382 struct list_head list;
386 int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
387 int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
388 size_t (*print)(FILE *fp, struct hist_entry *);
394 sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
396 return right->thread->pid - left->thread->pid;
400 sort__thread_print(FILE *fp, struct hist_entry *self)
402 return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
405 static struct sort_entry sort_thread = {
406 .header = " Command: Pid",
407 .cmp = sort__thread_cmp,
408 .print = sort__thread_print,
414 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
416 return right->thread->pid - left->thread->pid;
420 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
422 char *comm_l = left->thread->comm;
423 char *comm_r = right->thread->comm;
425 if (!comm_l || !comm_r) {
426 if (!comm_l && !comm_r)
434 return strcmp(comm_l, comm_r);
438 sort__comm_print(FILE *fp, struct hist_entry *self)
440 return fprintf(fp, "%16s", self->thread->comm);
443 static struct sort_entry sort_comm = {
444 .header = " Command",
445 .cmp = sort__comm_cmp,
446 .collapse = sort__comm_collapse,
447 .print = sort__comm_print,
453 sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
455 struct dso *dso_l = left->dso;
456 struct dso *dso_r = right->dso;
458 if (!dso_l || !dso_r) {
459 if (!dso_l && !dso_r)
467 return strcmp(dso_l->name, dso_r->name);
471 sort__dso_print(FILE *fp, struct hist_entry *self)
474 return fprintf(fp, "%-25s", self->dso->name);
476 return fprintf(fp, "%016llx ", (__u64)self->ip);
479 static struct sort_entry sort_dso = {
480 .header = "Shared Object ",
481 .cmp = sort__dso_cmp,
482 .print = sort__dso_print,
488 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
492 if (left->sym == right->sym)
495 ip_l = left->sym ? left->sym->start : left->ip;
496 ip_r = right->sym ? right->sym->start : right->ip;
498 return (int64_t)(ip_r - ip_l);
502 sort__sym_print(FILE *fp, struct hist_entry *self)
507 ret += fprintf(fp, "%#018llx ", (__u64)self->ip);
510 ret += fprintf(fp, "%s", self->sym->name);
512 ret += fprintf(fp, "%#016llx", (__u64)self->ip);
517 static struct sort_entry sort_sym = {
519 .cmp = sort__sym_cmp,
520 .print = sort__sym_print,
523 static int sort__need_collapse = 0;
525 struct sort_dimension {
527 struct sort_entry *entry;
531 static struct sort_dimension sort_dimensions[] = {
532 { .name = "pid", .entry = &sort_thread, },
533 { .name = "comm", .entry = &sort_comm, },
534 { .name = "dso", .entry = &sort_dso, },
535 { .name = "symbol", .entry = &sort_sym, },
538 static LIST_HEAD(hist_entry__sort_list);
540 static int sort_dimension__add(char *tok)
544 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
545 struct sort_dimension *sd = &sort_dimensions[i];
550 if (strncasecmp(tok, sd->name, strlen(tok)))
553 if (sd->entry->collapse)
554 sort__need_collapse = 1;
556 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
566 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
568 struct sort_entry *se;
571 list_for_each_entry(se, &hist_entry__sort_list, list) {
572 cmp = se->cmp(left, right);
581 hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
583 struct sort_entry *se;
586 list_for_each_entry(se, &hist_entry__sort_list, list) {
587 int64_t (*f)(struct hist_entry *, struct hist_entry *);
589 f = se->collapse ?: se->cmp;
591 cmp = f(left, right);
600 hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
602 struct sort_entry *se;
606 double percent = self->count * 100.0 / total_samples;
607 char *color = PERF_COLOR_NORMAL;
610 * We color high-overhead entries in red, low-overhead
611 * entries in green - and keep the middle ground normal:
614 color = PERF_COLOR_RED;
616 color = PERF_COLOR_GREEN;
618 ret = color_fprintf(fp, color, " %6.2f%%",
619 (self->count * 100.0) / total_samples);
621 ret = fprintf(fp, "%12d ", self->count);
623 list_for_each_entry(se, &hist_entry__sort_list, list) {
625 ret += se->print(fp, self);
628 ret += fprintf(fp, "\n");
634 * collect histogram counts
638 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
639 struct symbol *sym, uint64_t ip, char level)
641 struct rb_node **p = &hist.rb_node;
642 struct rb_node *parent = NULL;
643 struct hist_entry *he;
644 struct hist_entry entry = {
657 he = rb_entry(parent, struct hist_entry, rb_node);
659 cmp = hist_entry__cmp(&entry, he);
672 he = malloc(sizeof(*he));
676 rb_link_node(&he->rb_node, parent, p);
677 rb_insert_color(&he->rb_node, &hist);
682 static void hist_entry__free(struct hist_entry *he)
688 * collapse the histogram
691 static struct rb_root collapse_hists;
693 static void collapse__insert_entry(struct hist_entry *he)
695 struct rb_node **p = &collapse_hists.rb_node;
696 struct rb_node *parent = NULL;
697 struct hist_entry *iter;
702 iter = rb_entry(parent, struct hist_entry, rb_node);
704 cmp = hist_entry__collapse(iter, he);
707 iter->count += he->count;
708 hist_entry__free(he);
718 rb_link_node(&he->rb_node, parent, p);
719 rb_insert_color(&he->rb_node, &collapse_hists);
722 static void collapse__resort(void)
724 struct rb_node *next;
725 struct hist_entry *n;
727 if (!sort__need_collapse)
730 next = rb_first(&hist);
732 n = rb_entry(next, struct hist_entry, rb_node);
733 next = rb_next(&n->rb_node);
735 rb_erase(&n->rb_node, &hist);
736 collapse__insert_entry(n);
741 * reverse the map, sort on count.
744 static struct rb_root output_hists;
746 static void output__insert_entry(struct hist_entry *he)
748 struct rb_node **p = &output_hists.rb_node;
749 struct rb_node *parent = NULL;
750 struct hist_entry *iter;
754 iter = rb_entry(parent, struct hist_entry, rb_node);
756 if (he->count > iter->count)
762 rb_link_node(&he->rb_node, parent, p);
763 rb_insert_color(&he->rb_node, &output_hists);
766 static void output__resort(void)
768 struct rb_node *next;
769 struct hist_entry *n;
770 struct rb_root *tree = &hist;
772 if (sort__need_collapse)
773 tree = &collapse_hists;
775 next = rb_first(tree);
778 n = rb_entry(next, struct hist_entry, rb_node);
779 next = rb_next(&n->rb_node);
781 rb_erase(&n->rb_node, tree);
782 output__insert_entry(n);
786 static size_t output__fprintf(FILE *fp, uint64_t total_samples)
788 struct hist_entry *pos;
789 struct sort_entry *se;
795 fprintf(fp, "# (%Ld profiler events)\n", (__u64)total_samples);
798 fprintf(fp, "# Overhead");
799 list_for_each_entry(se, &hist_entry__sort_list, list)
800 fprintf(fp, " %s", se->header);
803 fprintf(fp, "# ........");
804 list_for_each_entry(se, &hist_entry__sort_list, list) {
808 for (i = 0; i < strlen(se->header); i++)
815 for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
816 pos = rb_entry(nd, struct hist_entry, rb_node);
817 ret += hist_entry__fprintf(fp, pos, total_samples);
820 if (!strcmp(sort_order, default_sort_order)) {
822 fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
830 static void register_idle_thread(void)
832 struct thread *thread = threads__findnew(0);
834 if (thread == NULL ||
835 thread__set_comm(thread, "[idle]")) {
836 fprintf(stderr, "problem inserting idle task.\n");
841 static unsigned long total = 0,
848 process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
852 struct dso *dso = NULL;
853 struct thread *thread = threads__findnew(event->ip.pid);
854 uint64_t ip = event->ip.ip;
855 struct map *map = NULL;
857 dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
858 (void *)(offset + head),
859 (void *)(long)(event->header.size),
864 dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
866 if (thread == NULL) {
867 fprintf(stderr, "problem processing %d event, skipping it.\n",
872 if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
878 dprintf(" ...... dso: %s\n", dso->name);
880 } else if (event->header.misc & PERF_EVENT_MISC_USER) {
885 map = thread__find_map(thread, ip);
888 ip -= map->start + map->pgoff;
891 * If this is outside of all known maps,
892 * and is a negative address, try to look it
893 * up in the kernel dso, as it might be a
894 * vsyscall (which executes in user-mode):
896 if ((long long)ip < 0)
899 dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
904 dprintf(" ...... dso: [hypervisor]\n");
907 if (show & show_mask) {
908 struct symbol *sym = dso__find_symbol(dso, ip);
910 if (hist_entry__add(thread, map, dso, sym, ip, level)) {
912 "problem incrementing symbol count, skipping event\n");
922 process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
924 struct thread *thread = threads__findnew(event->mmap.pid);
925 struct map *map = map__new(&event->mmap);
927 dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
928 (void *)(offset + head),
929 (void *)(long)(event->header.size),
931 (void *)(long)event->mmap.start,
932 (void *)(long)event->mmap.len,
933 (void *)(long)event->mmap.pgoff,
934 event->mmap.filename);
936 if (thread == NULL || map == NULL) {
937 dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
941 thread__insert_map(thread, map);
948 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
950 struct thread *thread = threads__findnew(event->comm.pid);
952 dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
953 (void *)(offset + head),
954 (void *)(long)(event->header.size),
955 event->comm.comm, event->comm.pid);
957 if (thread == NULL ||
958 thread__set_comm(thread, event->comm.comm)) {
959 dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
968 process_fork_event(event_t *event, unsigned long offset, unsigned long head)
970 struct thread *thread = threads__findnew(event->fork.pid);
971 struct thread *parent = threads__findnew(event->fork.ppid);
973 dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
974 (void *)(offset + head),
975 (void *)(long)(event->header.size),
976 event->fork.pid, event->fork.ppid);
978 if (!thread || !parent || thread__fork(thread, parent)) {
979 dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
988 process_event(event_t *event, unsigned long offset, unsigned long head)
990 if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
991 return process_overflow_event(event, offset, head);
993 switch (event->header.type) {
994 case PERF_EVENT_MMAP:
995 return process_mmap_event(event, offset, head);
997 case PERF_EVENT_COMM:
998 return process_comm_event(event, offset, head);
1000 case PERF_EVENT_FORK:
1001 return process_fork_event(event, offset, head);
1004 * We dont process them right now but they are fine:
1007 case PERF_EVENT_PERIOD:
1008 case PERF_EVENT_THROTTLE:
1009 case PERF_EVENT_UNTHROTTLE:
1019 static int __cmd_report(void)
1021 int ret, rc = EXIT_FAILURE;
1022 unsigned long offset = 0;
1023 unsigned long head = 0;
1029 register_idle_thread();
1031 input = open(input_name, O_RDONLY);
1033 perror("failed to open file");
1037 ret = fstat(input, &stat);
1039 perror("failed to stat file");
1043 if (!stat.st_size) {
1044 fprintf(stderr, "zero-sized file, nothing to do!\n");
1048 if (load_kernel() < 0) {
1049 perror("failed to load kernel symbols");
1050 return EXIT_FAILURE;
1054 if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
1055 perror("failed to get the current directory");
1056 return EXIT_FAILURE;
1058 cwdlen = strlen(cwd);
1064 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1065 MAP_SHARED, input, offset);
1066 if (buf == MAP_FAILED) {
1067 perror("failed to mmap file");
1072 event = (event_t *)(buf + head);
1074 size = event->header.size;
1078 if (head + event->header.size >= page_size * mmap_window) {
1079 unsigned long shift = page_size * (head / page_size);
1082 ret = munmap(buf, page_size * mmap_window);
1090 size = event->header.size;
1092 if (!size || process_event(event, offset, head) < 0) {
1094 dprintf("%p [%p]: skipping unknown header type: %d\n",
1095 (void *)(offset + head),
1096 (void *)(long)(event->header.size),
1097 event->header.type);
1102 * assume we lost track of the stream, check alignment, and
1103 * increment a single u64 in the hope to catch on again 'soon'.
1106 if (unlikely(head & 7))
1114 if (offset + head < stat.st_size)
1120 dprintf(" IP events: %10ld\n", total);
1121 dprintf(" mmap events: %10ld\n", total_mmap);
1122 dprintf(" comm events: %10ld\n", total_comm);
1123 dprintf(" fork events: %10ld\n", total_fork);
1124 dprintf(" unknown events: %10ld\n", total_unknown);
1130 dsos__fprintf(stdout);
1134 output__fprintf(stdout, total);
1139 static const char * const report_usage[] = {
1140 "perf report [<options>] <command>",
1144 static const struct option options[] = {
1145 OPT_STRING('i', "input", &input_name, "file",
1147 OPT_BOOLEAN('v', "verbose", &verbose,
1148 "be more verbose (show symbol address, etc)"),
1149 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1150 "dump raw trace in ASCII"),
1151 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
1152 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1153 "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
1154 OPT_BOOLEAN('P', "full-paths", &full_paths,
1155 "Don't shorten the pathnames taking into account the cwd"),
1159 static void setup_sorting(void)
1161 char *tmp, *tok, *str = strdup(sort_order);
1163 for (tok = strtok_r(str, ", ", &tmp);
1164 tok; tok = strtok_r(NULL, ", ", &tmp)) {
1165 if (sort_dimension__add(tok) < 0) {
1166 error("Unknown --sort key: `%s'", tok);
1167 usage_with_options(report_usage, options);
1174 int cmd_report(int argc, const char **argv, const char *prefix)
1178 page_size = getpagesize();
1180 argc = parse_options(argc, argv, options, report_usage, 0);
1185 * Any (unrecognized) arguments left?
1188 usage_with_options(report_usage, options);
1192 return __cmd_report();