perf kmem: Collect cross node allocation statistics
[safe/jmp/linux-2.6] / tools / perf / builtin-kmem.c
1 #include "builtin.h"
2 #include "perf.h"
3
4 #include "util/util.h"
5 #include "util/cache.h"
6 #include "util/symbol.h"
7 #include "util/thread.h"
8 #include "util/header.h"
9
10 #include "util/parse-options.h"
11 #include "util/trace-event.h"
12
13 #include "util/debug.h"
14 #include "util/data_map.h"
15
16 #include <linux/rbtree.h>
17
18 struct alloc_stat;
19 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
20
21 static char const               *input_name = "perf.data";
22
23 static struct perf_header       *header;
24 static u64                      sample_type;
25
26 static int                      alloc_flag;
27 static int                      caller_flag;
28
29 static int                      alloc_lines = -1;
30 static int                      caller_lines = -1;
31
32 static bool                     raw_ip;
33
34 static char                     default_sort_order[] = "frag,hit,bytes";
35
36 static char                     *cwd;
37 static int                      cwdlen;
38
39 static int                      *cpunode_map;
40 static int                      max_cpu_num;
41
42 struct alloc_stat {
43         union {
44                 u64     call_site;
45                 u64     ptr;
46         };
47         u64     bytes_req;
48         u64     bytes_alloc;
49         u32     hit;
50
51         struct rb_node node;
52 };
53
54 static struct rb_root root_alloc_stat;
55 static struct rb_root root_alloc_sorted;
56 static struct rb_root root_caller_stat;
57 static struct rb_root root_caller_sorted;
58
59 static unsigned long total_requested, total_allocated;
60 static unsigned long nr_allocs, nr_cross_allocs;
61
62 struct raw_event_sample {
63         u32 size;
64         char data[0];
65 };
66
67 #define PATH_SYS_NODE   "/sys/devices/system/node"
68
69 static void init_cpunode_map(void)
70 {
71         FILE *fp;
72         int i;
73
74         fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
75         if (!fp) {
76                 max_cpu_num = 4096;
77                 return;
78         }
79
80         if (fscanf(fp, "%d", &max_cpu_num) < 1)
81                 die("Failed to read 'kernel_max' from sysfs");
82         max_cpu_num++;
83
84         cpunode_map = calloc(max_cpu_num, sizeof(int));
85         if (!cpunode_map)
86                 die("calloc");
87         for (i = 0; i < max_cpu_num; i++)
88                 cpunode_map[i] = -1;
89         fclose(fp);
90 }
91
92 static void setup_cpunode_map(void)
93 {
94         struct dirent *dent1, *dent2;
95         DIR *dir1, *dir2;
96         unsigned int cpu, mem;
97         char buf[PATH_MAX];
98
99         init_cpunode_map();
100
101         dir1 = opendir(PATH_SYS_NODE);
102         if (!dir1)
103                 return;
104
105         while (true) {
106                 dent1 = readdir(dir1);
107                 if (!dent1)
108                         break;
109
110                 if (sscanf(dent1->d_name, "node%u", &mem) < 1)
111                         continue;
112
113                 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
114                 dir2 = opendir(buf);
115                 if (!dir2)
116                         continue;
117                 while (true) {
118                         dent2 = readdir(dir2);
119                         if (!dent2)
120                                 break;
121                         if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
122                                 continue;
123                         cpunode_map[cpu] = mem;
124                 }
125         }
126 }
127
128 static int
129 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
130 {
131         struct thread *thread = threads__findnew(event->comm.pid);
132
133         dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
134                 (void *)(offset + head),
135                 (void *)(long)(event->header.size),
136                 event->comm.comm, event->comm.pid);
137
138         if (thread == NULL ||
139             thread__set_comm(thread, event->comm.comm)) {
140                 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
141                 return -1;
142         }
143
144         return 0;
145 }
146
147 static void insert_alloc_stat(unsigned long ptr,
148                               int bytes_req, int bytes_alloc)
149 {
150         struct rb_node **node = &root_alloc_stat.rb_node;
151         struct rb_node *parent = NULL;
152         struct alloc_stat *data = NULL;
153
154         if (!alloc_flag)
155                 return;
156
157         while (*node) {
158                 parent = *node;
159                 data = rb_entry(*node, struct alloc_stat, node);
160
161                 if (ptr > data->ptr)
162                         node = &(*node)->rb_right;
163                 else if (ptr < data->ptr)
164                         node = &(*node)->rb_left;
165                 else
166                         break;
167         }
168
169         if (data && data->ptr == ptr) {
170                 data->hit++;
171                 data->bytes_req += bytes_req;
172                 data->bytes_alloc += bytes_req;
173         } else {
174                 data = malloc(sizeof(*data));
175                 data->ptr = ptr;
176                 data->hit = 1;
177                 data->bytes_req = bytes_req;
178                 data->bytes_alloc = bytes_alloc;
179
180                 rb_link_node(&data->node, parent, node);
181                 rb_insert_color(&data->node, &root_alloc_stat);
182         }
183 }
184
185 static void insert_caller_stat(unsigned long call_site,
186                               int bytes_req, int bytes_alloc)
187 {
188         struct rb_node **node = &root_caller_stat.rb_node;
189         struct rb_node *parent = NULL;
190         struct alloc_stat *data = NULL;
191
192         if (!caller_flag)
193                 return;
194
195         while (*node) {
196                 parent = *node;
197                 data = rb_entry(*node, struct alloc_stat, node);
198
199                 if (call_site > data->call_site)
200                         node = &(*node)->rb_right;
201                 else if (call_site < data->call_site)
202                         node = &(*node)->rb_left;
203                 else
204                         break;
205         }
206
207         if (data && data->call_site == call_site) {
208                 data->hit++;
209                 data->bytes_req += bytes_req;
210                 data->bytes_alloc += bytes_req;
211         } else {
212                 data = malloc(sizeof(*data));
213                 data->call_site = call_site;
214                 data->hit = 1;
215                 data->bytes_req = bytes_req;
216                 data->bytes_alloc = bytes_alloc;
217
218                 rb_link_node(&data->node, parent, node);
219                 rb_insert_color(&data->node, &root_caller_stat);
220         }
221 }
222
223 static void process_alloc_event(struct raw_event_sample *raw,
224                                 struct event *event,
225                                 int cpu,
226                                 u64 timestamp __used,
227                                 struct thread *thread __used,
228                                 int node)
229 {
230         unsigned long call_site;
231         unsigned long ptr;
232         int bytes_req;
233         int bytes_alloc;
234         int node1, node2;
235
236         ptr = raw_field_value(event, "ptr", raw->data);
237         call_site = raw_field_value(event, "call_site", raw->data);
238         bytes_req = raw_field_value(event, "bytes_req", raw->data);
239         bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
240
241         insert_alloc_stat(ptr, bytes_req, bytes_alloc);
242         insert_caller_stat(call_site, bytes_req, bytes_alloc);
243
244         total_requested += bytes_req;
245         total_allocated += bytes_alloc;
246
247         if (node) {
248                 node1 = cpunode_map[cpu];
249                 node2 = raw_field_value(event, "node", raw->data);
250                 if (node1 != node2)
251                         nr_cross_allocs++;
252         }
253         nr_allocs++;
254 }
255
256 static void process_free_event(struct raw_event_sample *raw __used,
257                                struct event *event __used,
258                                int cpu __used,
259                                u64 timestamp __used,
260                                struct thread *thread __used)
261 {
262 }
263
264 static void
265 process_raw_event(event_t *raw_event __used, void *more_data,
266                   int cpu, u64 timestamp, struct thread *thread)
267 {
268         struct raw_event_sample *raw = more_data;
269         struct event *event;
270         int type;
271
272         type = trace_parse_common_type(raw->data);
273         event = trace_find_event(type);
274
275         if (!strcmp(event->name, "kmalloc") ||
276             !strcmp(event->name, "kmem_cache_alloc")) {
277                 process_alloc_event(raw, event, cpu, timestamp, thread, 0);
278                 return;
279         }
280
281         if (!strcmp(event->name, "kmalloc_node") ||
282             !strcmp(event->name, "kmem_cache_alloc_node")) {
283                 process_alloc_event(raw, event, cpu, timestamp, thread, 1);
284                 return;
285         }
286
287         if (!strcmp(event->name, "kfree") ||
288             !strcmp(event->name, "kmem_cache_free")) {
289                 process_free_event(raw, event, cpu, timestamp, thread);
290                 return;
291         }
292 }
293
294 static int
295 process_sample_event(event_t *event, unsigned long offset, unsigned long head)
296 {
297         u64 ip = event->ip.ip;
298         u64 timestamp = -1;
299         u32 cpu = -1;
300         u64 period = 1;
301         void *more_data = event->ip.__more_data;
302         struct thread *thread = threads__findnew(event->ip.pid);
303
304         if (sample_type & PERF_SAMPLE_TIME) {
305                 timestamp = *(u64 *)more_data;
306                 more_data += sizeof(u64);
307         }
308
309         if (sample_type & PERF_SAMPLE_CPU) {
310                 cpu = *(u32 *)more_data;
311                 more_data += sizeof(u32);
312                 more_data += sizeof(u32); /* reserved */
313         }
314
315         if (sample_type & PERF_SAMPLE_PERIOD) {
316                 period = *(u64 *)more_data;
317                 more_data += sizeof(u64);
318         }
319
320         dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
321                 (void *)(offset + head),
322                 (void *)(long)(event->header.size),
323                 event->header.misc,
324                 event->ip.pid, event->ip.tid,
325                 (void *)(long)ip,
326                 (long long)period);
327
328         if (thread == NULL) {
329                 pr_debug("problem processing %d event, skipping it.\n",
330                          event->header.type);
331                 return -1;
332         }
333
334         dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
335
336         process_raw_event(event, more_data, cpu, timestamp, thread);
337
338         return 0;
339 }
340
341 static int sample_type_check(u64 type)
342 {
343         sample_type = type;
344
345         if (!(sample_type & PERF_SAMPLE_RAW)) {
346                 fprintf(stderr,
347                         "No trace sample to read. Did you call perf record "
348                         "without -R?");
349                 return -1;
350         }
351
352         return 0;
353 }
354
355 static struct perf_file_handler file_handler = {
356         .process_sample_event   = process_sample_event,
357         .process_comm_event     = process_comm_event,
358         .sample_type_check      = sample_type_check,
359 };
360
361 static int read_events(void)
362 {
363         register_idle_thread();
364         register_perf_file_handler(&file_handler);
365
366         return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0,
367                                        &cwdlen, &cwd);
368 }
369
370 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
371 {
372         if (n_alloc == 0)
373                 return 0.0;
374         else
375                 return 100.0 - (100.0 * n_req / n_alloc);
376 }
377
378 static void __print_result(struct rb_root *root, int n_lines, int is_caller)
379 {
380         struct rb_node *next;
381
382         printf("%.78s\n", graph_dotted_line);
383         printf("%-28s|",  is_caller ? "Callsite": "Alloc Ptr");
384         printf("Total_alloc/Per | Total_req/Per | Hit  | Frag\n");
385         printf("%.78s\n", graph_dotted_line);
386
387         next = rb_first(root);
388
389         while (next && n_lines--) {
390                 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
391                                                    node);
392                 struct symbol *sym = NULL;
393                 char bf[BUFSIZ];
394                 u64 addr;
395
396                 if (is_caller) {
397                         addr = data->call_site;
398                         if (!raw_ip)
399                                 sym = kernel_maps__find_symbol(addr,
400                                                                NULL, NULL);
401                 } else
402                         addr = data->ptr;
403
404                 if (sym != NULL)
405                         snprintf(bf, sizeof(bf), "%s+%Lx", sym->name,
406                                  addr - sym->start);
407                 else
408                         snprintf(bf, sizeof(bf), "%#Lx", addr);
409
410                 printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n",
411                        bf, (unsigned long long)data->bytes_alloc,
412                        (unsigned long)data->bytes_alloc / data->hit,
413                        (unsigned long long)data->bytes_req,
414                        (unsigned long)data->bytes_req / data->hit,
415                        (unsigned long)data->hit,
416                        fragmentation(data->bytes_req, data->bytes_alloc));
417
418                 next = rb_next(next);
419         }
420
421         if (n_lines == -1)
422                 printf(" ...                        | ...            | ...           | ...    | ...   \n");
423
424         printf("%.78s\n", graph_dotted_line);
425 }
426
427 static void print_summary(void)
428 {
429         printf("\nSUMMARY\n=======\n");
430         printf("Total bytes requested: %lu\n", total_requested);
431         printf("Total bytes allocated: %lu\n", total_allocated);
432         printf("Total bytes wasted on internal fragmentation: %lu\n",
433                total_allocated - total_requested);
434         printf("Internal fragmentation: %f%%\n",
435                fragmentation(total_requested, total_allocated));
436         printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
437 }
438
439 static void print_result(void)
440 {
441         if (caller_flag)
442                 __print_result(&root_caller_sorted, caller_lines, 1);
443         if (alloc_flag)
444                 __print_result(&root_alloc_sorted, alloc_lines, 0);
445         print_summary();
446 }
447
448 struct sort_dimension {
449         const char              name[20];
450         sort_fn_t               cmp;
451         struct list_head        list;
452 };
453
454 static LIST_HEAD(caller_sort);
455 static LIST_HEAD(alloc_sort);
456
457 static void sort_insert(struct rb_root *root, struct alloc_stat *data,
458                         struct list_head *sort_list)
459 {
460         struct rb_node **new = &(root->rb_node);
461         struct rb_node *parent = NULL;
462         struct sort_dimension *sort;
463
464         while (*new) {
465                 struct alloc_stat *this;
466                 int cmp = 0;
467
468                 this = rb_entry(*new, struct alloc_stat, node);
469                 parent = *new;
470
471                 list_for_each_entry(sort, sort_list, list) {
472                         cmp = sort->cmp(data, this);
473                         if (cmp)
474                                 break;
475                 }
476
477                 if (cmp > 0)
478                         new = &((*new)->rb_left);
479                 else
480                         new = &((*new)->rb_right);
481         }
482
483         rb_link_node(&data->node, parent, new);
484         rb_insert_color(&data->node, root);
485 }
486
487 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
488                           struct list_head *sort_list)
489 {
490         struct rb_node *node;
491         struct alloc_stat *data;
492
493         for (;;) {
494                 node = rb_first(root);
495                 if (!node)
496                         break;
497
498                 rb_erase(node, root);
499                 data = rb_entry(node, struct alloc_stat, node);
500                 sort_insert(root_sorted, data, sort_list);
501         }
502 }
503
504 static void sort_result(void)
505 {
506         __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
507         __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
508 }
509
510 static int __cmd_kmem(void)
511 {
512         setup_pager();
513         read_events();
514         sort_result();
515         print_result();
516
517         return 0;
518 }
519
520 static const char * const kmem_usage[] = {
521         "perf kmem [<options>] {record}",
522         NULL
523 };
524
525 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
526 {
527         if (l->ptr < r->ptr)
528                 return -1;
529         else if (l->ptr > r->ptr)
530                 return 1;
531         return 0;
532 }
533
534 static struct sort_dimension ptr_sort_dimension = {
535         .name   = "ptr",
536         .cmp    = ptr_cmp,
537 };
538
539 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
540 {
541         if (l->call_site < r->call_site)
542                 return -1;
543         else if (l->call_site > r->call_site)
544                 return 1;
545         return 0;
546 }
547
548 static struct sort_dimension callsite_sort_dimension = {
549         .name   = "callsite",
550         .cmp    = callsite_cmp,
551 };
552
553 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
554 {
555         if (l->hit < r->hit)
556                 return -1;
557         else if (l->hit > r->hit)
558                 return 1;
559         return 0;
560 }
561
562 static struct sort_dimension hit_sort_dimension = {
563         .name   = "hit",
564         .cmp    = hit_cmp,
565 };
566
567 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
568 {
569         if (l->bytes_alloc < r->bytes_alloc)
570                 return -1;
571         else if (l->bytes_alloc > r->bytes_alloc)
572                 return 1;
573         return 0;
574 }
575
576 static struct sort_dimension bytes_sort_dimension = {
577         .name   = "bytes",
578         .cmp    = bytes_cmp,
579 };
580
581 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
582 {
583         double x, y;
584
585         x = fragmentation(l->bytes_req, l->bytes_alloc);
586         y = fragmentation(r->bytes_req, r->bytes_alloc);
587
588         if (x < y)
589                 return -1;
590         else if (x > y)
591                 return 1;
592         return 0;
593 }
594
595 static struct sort_dimension frag_sort_dimension = {
596         .name   = "frag",
597         .cmp    = frag_cmp,
598 };
599
600 static struct sort_dimension *avail_sorts[] = {
601         &ptr_sort_dimension,
602         &callsite_sort_dimension,
603         &hit_sort_dimension,
604         &bytes_sort_dimension,
605         &frag_sort_dimension,
606 };
607
608 #define NUM_AVAIL_SORTS \
609         (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
610
611 static int sort_dimension__add(const char *tok, struct list_head *list)
612 {
613         struct sort_dimension *sort;
614         int i;
615
616         for (i = 0; i < NUM_AVAIL_SORTS; i++) {
617                 if (!strcmp(avail_sorts[i]->name, tok)) {
618                         sort = malloc(sizeof(*sort));
619                         if (!sort)
620                                 die("malloc");
621                         memcpy(sort, avail_sorts[i], sizeof(*sort));
622                         list_add_tail(&sort->list, list);
623                         return 0;
624                 }
625         }
626
627         return -1;
628 }
629
630 static int setup_sorting(struct list_head *sort_list, const char *arg)
631 {
632         char *tok;
633         char *str = strdup(arg);
634
635         if (!str)
636                 die("strdup");
637
638         while (true) {
639                 tok = strsep(&str, ",");
640                 if (!tok)
641                         break;
642                 if (sort_dimension__add(tok, sort_list) < 0) {
643                         error("Unknown --sort key: '%s'", tok);
644                         return -1;
645                 }
646         }
647
648         free(str);
649         return 0;
650 }
651
652 static int parse_sort_opt(const struct option *opt __used,
653                           const char *arg, int unset __used)
654 {
655         if (!arg)
656                 return -1;
657
658         if (caller_flag > alloc_flag)
659                 return setup_sorting(&caller_sort, arg);
660         else
661                 return setup_sorting(&alloc_sort, arg);
662
663         return 0;
664 }
665
666 static int parse_stat_opt(const struct option *opt __used,
667                           const char *arg, int unset __used)
668 {
669         if (!arg)
670                 return -1;
671
672         if (strcmp(arg, "alloc") == 0)
673                 alloc_flag = (caller_flag + 1);
674         else if (strcmp(arg, "caller") == 0)
675                 caller_flag = (alloc_flag + 1);
676         else
677                 return -1;
678         return 0;
679 }
680
681 static int parse_line_opt(const struct option *opt __used,
682                           const char *arg, int unset __used)
683 {
684         int lines;
685
686         if (!arg)
687                 return -1;
688
689         lines = strtoul(arg, NULL, 10);
690
691         if (caller_flag > alloc_flag)
692                 caller_lines = lines;
693         else
694                 alloc_lines = lines;
695
696         return 0;
697 }
698
699 static const struct option kmem_options[] = {
700         OPT_STRING('i', "input", &input_name, "file",
701                    "input file name"),
702         OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
703                      "stat selector, Pass 'alloc' or 'caller'.",
704                      parse_stat_opt),
705         OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
706                      "sort by key(s): ptr, call_site, bytes, hit, frag",
707                      parse_sort_opt),
708         OPT_CALLBACK('l', "line", NULL, "num",
709                      "show n lins",
710                      parse_line_opt),
711         OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
712         OPT_END()
713 };
714
715 static const char *record_args[] = {
716         "record",
717         "-a",
718         "-R",
719         "-M",
720         "-f",
721         "-c", "1",
722         "-e", "kmem:kmalloc",
723         "-e", "kmem:kmalloc_node",
724         "-e", "kmem:kfree",
725         "-e", "kmem:kmem_cache_alloc",
726         "-e", "kmem:kmem_cache_alloc_node",
727         "-e", "kmem:kmem_cache_free",
728 };
729
730 static int __cmd_record(int argc, const char **argv)
731 {
732         unsigned int rec_argc, i, j;
733         const char **rec_argv;
734
735         rec_argc = ARRAY_SIZE(record_args) + argc - 1;
736         rec_argv = calloc(rec_argc + 1, sizeof(char *));
737
738         for (i = 0; i < ARRAY_SIZE(record_args); i++)
739                 rec_argv[i] = strdup(record_args[i]);
740
741         for (j = 1; j < (unsigned int)argc; j++, i++)
742                 rec_argv[i] = argv[j];
743
744         return cmd_record(i, rec_argv, NULL);
745 }
746
747 int cmd_kmem(int argc, const char **argv, const char *prefix __used)
748 {
749         symbol__init(0);
750
751         argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
752
753         if (argc && !strncmp(argv[0], "rec", 3))
754                 return __cmd_record(argc, argv);
755         else if (argc)
756                 usage_with_options(kmem_usage, kmem_options);
757
758         if (list_empty(&caller_sort))
759                 setup_sorting(&caller_sort, default_sort_order);
760         if (list_empty(&alloc_sort))
761                 setup_sorting(&alloc_sort, default_sort_order);
762
763         setup_cpunode_map();
764
765         return __cmd_kmem();
766 }
767