perf_counter tools: Document '--' option parsing terminator
[safe/jmp/linux-2.6] / Documentation / perf_counter / builtin-record.c
1
2
3 #include "perf.h"
4 #include "builtin.h"
5 #include "util/util.h"
6 #include "util/parse-options.h"
7 #include "util/parse-events.h"
8
9 #include <sched.h>
10
11 #define ALIGN(x, a)             __ALIGN_MASK(x, (typeof(x))(a)-1)
12 #define __ALIGN_MASK(x, mask)   (((x)+(mask))&~(mask))
13
14 static int                      default_interval = 100000;
15 static int                      event_count[MAX_COUNTERS];
16
17 static int                      fd[MAX_NR_CPUS][MAX_COUNTERS];
18 static int                      nr_cpus                         =  0;
19 static unsigned int             page_size;
20 static unsigned int             mmap_pages                      = 16;
21 static int                      output;
22 static const char               *output_name                    = "perf.data";
23 static int                      group                           = 0;
24 static unsigned int             realtime_prio                   = 0;
25 static int                      system_wide                     = 0;
26 static pid_t                    target_pid                      = -1;
27 static int                      inherit                         = 1;
28 static int                      nmi                             = 1;
29
30 const unsigned int default_count[] = {
31         1000000,
32         1000000,
33           10000,
34           10000,
35         1000000,
36           10000,
37 };
38
39 struct mmap_data {
40         int counter;
41         void *base;
42         unsigned int mask;
43         unsigned int prev;
44 };
45
46 static unsigned int mmap_read_head(struct mmap_data *md)
47 {
48         struct perf_counter_mmap_page *pc = md->base;
49         int head;
50
51         head = pc->data_head;
52         rmb();
53
54         return head;
55 }
56
57 static long events;
58 static struct timeval last_read, this_read;
59
60 static void mmap_read(struct mmap_data *md)
61 {
62         unsigned int head = mmap_read_head(md);
63         unsigned int old = md->prev;
64         unsigned char *data = md->base + page_size;
65         unsigned long size;
66         void *buf;
67         int diff;
68
69         gettimeofday(&this_read, NULL);
70
71         /*
72          * If we're further behind than half the buffer, there's a chance
73          * the writer will bite our tail and screw up the events under us.
74          *
75          * If we somehow ended up ahead of the head, we got messed up.
76          *
77          * In either case, truncate and restart at head.
78          */
79         diff = head - old;
80         if (diff > md->mask / 2 || diff < 0) {
81                 struct timeval iv;
82                 unsigned long msecs;
83
84                 timersub(&this_read, &last_read, &iv);
85                 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
86
87                 fprintf(stderr, "WARNING: failed to keep up with mmap data."
88                                 "  Last read %lu msecs ago.\n", msecs);
89
90                 /*
91                  * head points to a known good entry, start there.
92                  */
93                 old = head;
94         }
95
96         last_read = this_read;
97
98         if (old != head)
99                 events++;
100
101         size = head - old;
102
103         if ((old & md->mask) + size != (head & md->mask)) {
104                 buf = &data[old & md->mask];
105                 size = md->mask + 1 - (old & md->mask);
106                 old += size;
107                 while (size) {
108                         int ret = write(output, buf, size);
109                         if (ret < 0) {
110                                 perror("failed to write");
111                                 exit(-1);
112                         }
113                         size -= ret;
114                         buf += ret;
115                 }
116         }
117
118         buf = &data[old & md->mask];
119         size = head - old;
120         old += size;
121         while (size) {
122                 int ret = write(output, buf, size);
123                 if (ret < 0) {
124                         perror("failed to write");
125                         exit(-1);
126                 }
127                 size -= ret;
128                 buf += ret;
129         }
130
131         md->prev = old;
132 }
133
134 static volatile int done = 0;
135
136 static void sig_handler(int sig)
137 {
138         done = 1;
139 }
140
141 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
142 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
143
144 static int nr_poll;
145 static int nr_cpu;
146
147 struct mmap_event {
148         struct perf_event_header        header;
149         __u32                           pid;
150         __u32                           tid;
151         __u64                           start;
152         __u64                           len;
153         __u64                           pgoff;
154         char                            filename[PATH_MAX];
155 };
156
157 struct comm_event {
158         struct perf_event_header        header;
159         __u32                           pid;
160         __u32                           tid;
161         char                            comm[16];
162 };
163
164 static pid_t pid_synthesize_comm_event(pid_t pid)
165 {
166         struct comm_event comm_ev;
167         char filename[PATH_MAX];
168         pid_t spid, ppid;
169         char bf[BUFSIZ];
170         int fd, nr, ret;
171         char comm[18];
172         size_t size;
173         char state;
174
175         snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
176
177         fd = open(filename, O_RDONLY);
178         if (fd < 0) {
179                 fprintf(stderr, "couldn't open %s\n", filename);
180                 exit(EXIT_FAILURE);
181         }
182         if (read(fd, bf, sizeof(bf)) < 0) {
183                 fprintf(stderr, "couldn't read %s\n", filename);
184                 exit(EXIT_FAILURE);
185         }
186         close(fd);
187
188         memset(&comm_ev, 0, sizeof(comm_ev));
189         nr = sscanf(bf, "%d %s %c %d %d ",
190                         &spid, comm, &state, &ppid, &comm_ev.pid);
191         if (nr != 5) {
192                 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
193                         filename);
194                 exit(EXIT_FAILURE);
195         }
196         comm_ev.header.type = PERF_EVENT_COMM;
197         comm_ev.tid = pid;
198         size = strlen(comm);
199         comm[--size] = '\0'; /* Remove the ')' at the end */
200         --size; /* Remove the '(' at the begin */
201         memcpy(comm_ev.comm, comm + 1, size);
202         size = ALIGN(size, sizeof(uint64_t));
203         comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
204
205         ret = write(output, &comm_ev, comm_ev.header.size);
206         if (ret < 0) {
207                 perror("failed to write");
208                 exit(-1);
209         }
210         return comm_ev.pid;
211 }
212
213 static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
214 {
215         char filename[PATH_MAX];
216         FILE *fp;
217
218         snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
219
220         fp = fopen(filename, "r");
221         if (fp == NULL) {
222                 fprintf(stderr, "couldn't open %s\n", filename);
223                 exit(EXIT_FAILURE);
224         }
225         while (1) {
226                 char bf[BUFSIZ];
227                 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
228                 struct mmap_event mmap_ev = {
229                         .header.type = PERF_EVENT_MMAP,
230                 };
231                 unsigned long ino;
232                 int major, minor;
233                 size_t size;
234                 if (fgets(bf, sizeof(bf), fp) == NULL)
235                         break;
236
237                 /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
238                 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
239                         &mmap_ev.start, &mmap_ev.len,
240                         &vm_read, &vm_write, &vm_exec, &vm_mayshare,
241                         &mmap_ev.pgoff, &major, &minor, &ino);
242                 if (vm_exec == 'x') {
243                         char *execname = strrchr(bf, ' ');
244
245                         if (execname == NULL || execname[1] != '/')
246                                 continue;
247
248                         execname += 1;
249                         size = strlen(execname);
250                         execname[size - 1] = '\0'; /* Remove \n */
251                         memcpy(mmap_ev.filename, execname, size);
252                         size = ALIGN(size, sizeof(uint64_t));
253                         mmap_ev.len -= mmap_ev.start;
254                         mmap_ev.header.size = (sizeof(mmap_ev) -
255                                                (sizeof(mmap_ev.filename) - size));
256                         mmap_ev.pid = pgid;
257                         mmap_ev.tid = pid;
258
259                         if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
260                                 perror("failed to write");
261                                 exit(-1);
262                         }
263                 }
264         }
265
266         fclose(fp);
267 }
268
269 static void open_counters(int cpu, pid_t pid)
270 {
271         struct perf_counter_hw_event hw_event;
272         int counter, group_fd;
273         int track = 1;
274
275         if (pid > 0) {
276                 pid_t pgid = pid_synthesize_comm_event(pid);
277                 pid_synthesize_mmap_events(pid, pgid);
278         }
279
280         group_fd = -1;
281         for (counter = 0; counter < nr_counters; counter++) {
282
283                 memset(&hw_event, 0, sizeof(hw_event));
284                 hw_event.config         = event_id[counter];
285                 hw_event.irq_period     = event_count[counter];
286                 hw_event.record_type    = PERF_RECORD_IP | PERF_RECORD_TID;
287                 hw_event.nmi            = nmi;
288                 hw_event.mmap           = track;
289                 hw_event.comm           = track;
290                 hw_event.inherit        = (cpu < 0) && inherit;
291
292                 track = 0; // only the first counter needs these
293
294                 fd[nr_cpu][counter] =
295                         sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
296
297                 if (fd[nr_cpu][counter] < 0) {
298                         int err = errno;
299                         printf("kerneltop error: syscall returned with %d (%s)\n",
300                                         fd[nr_cpu][counter], strerror(err));
301                         if (err == EPERM)
302                                 printf("Are you root?\n");
303                         exit(-1);
304                 }
305                 assert(fd[nr_cpu][counter] >= 0);
306                 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
307
308                 /*
309                  * First counter acts as the group leader:
310                  */
311                 if (group && group_fd == -1)
312                         group_fd = fd[nr_cpu][counter];
313
314                 event_array[nr_poll].fd = fd[nr_cpu][counter];
315                 event_array[nr_poll].events = POLLIN;
316                 nr_poll++;
317
318                 mmap_array[nr_cpu][counter].counter = counter;
319                 mmap_array[nr_cpu][counter].prev = 0;
320                 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
321                 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
322                                 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
323                 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
324                         printf("kerneltop error: failed to mmap with %d (%s)\n",
325                                         errno, strerror(errno));
326                         exit(-1);
327                 }
328         }
329         nr_cpu++;
330 }
331
332 static int __cmd_record(int argc, const char **argv)
333 {
334         int i, counter;
335         pid_t pid;
336         int ret;
337
338         page_size = sysconf(_SC_PAGE_SIZE);
339         nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
340         assert(nr_cpus <= MAX_NR_CPUS);
341         assert(nr_cpus >= 0);
342
343         output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
344         if (output < 0) {
345                 perror("failed to create output file");
346                 exit(-1);
347         }
348
349         if (!system_wide) {
350                 open_counters(-1, target_pid != -1 ? target_pid : 0);
351         } else for (i = 0; i < nr_cpus; i++)
352                 open_counters(i, target_pid);
353
354         signal(SIGCHLD, sig_handler);
355         signal(SIGINT, sig_handler);
356
357         if (target_pid == -1 && argc) {
358                 pid = fork();
359                 if (pid < 0)
360                         perror("failed to fork");
361
362                 if (!pid) {
363                         if (execvp(argv[0], (char **)argv)) {
364                                 perror(argv[0]);
365                                 exit(-1);
366                         }
367                 }
368         }
369
370         if (realtime_prio) {
371                 struct sched_param param;
372
373                 param.sched_priority = realtime_prio;
374                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
375                         printf("Could not set realtime priority.\n");
376                         exit(-1);
377                 }
378         }
379
380         /*
381          * TODO: store the current /proc/$/maps information somewhere
382          */
383
384         while (!done) {
385                 int hits = events;
386
387                 for (i = 0; i < nr_cpu; i++) {
388                         for (counter = 0; counter < nr_counters; counter++)
389                                 mmap_read(&mmap_array[i][counter]);
390                 }
391
392                 if (hits == events)
393                         ret = poll(event_array, nr_poll, 100);
394         }
395
396         return 0;
397 }
398
399 static const char * const record_usage[] = {
400         "perf record [<options>] [<command>]",
401         "perf record [<options>] -- <command> [<options>]",
402         NULL
403 };
404
405 static char events_help_msg[EVENTS_HELP_MAX];
406
407 static const struct option options[] = {
408         OPT_CALLBACK('e', "event", NULL, "event",
409                      events_help_msg, parse_events),
410         OPT_INTEGER('c', "count", &default_interval,
411                     "event period to sample"),
412         OPT_INTEGER('m', "mmap-pages", &mmap_pages,
413                     "number of mmap data pages"),
414         OPT_STRING('o', "output", &output_name, "file",
415                     "output file name"),
416         OPT_BOOLEAN('i', "inherit", &inherit,
417                     "child tasks inherit counters"),
418         OPT_INTEGER('p', "pid", &target_pid,
419                     "record events on existing pid"),
420         OPT_INTEGER('r', "realtime", &realtime_prio,
421                     "collect data with this RT SCHED_FIFO priority"),
422         OPT_BOOLEAN('a', "all-cpus", &system_wide,
423                             "system-wide collection from all CPUs"),
424         OPT_END()
425 };
426
427 int cmd_record(int argc, const char **argv, const char *prefix)
428 {
429         int counter;
430
431         create_events_help(events_help_msg);
432
433         argc = parse_options(argc, argv, options, record_usage, 0);
434         if (!argc && target_pid == -1 && !system_wide)
435                 usage_with_options(record_usage, options);
436
437         if (!nr_counters) {
438                 nr_counters = 1;
439                 event_id[0] = 0;
440         }
441
442         for (counter = 0; counter < nr_counters; counter++) {
443                 if (event_count[counter])
444                         continue;
445
446                 event_count[counter] = default_interval;
447         }
448
449         return __cmd_record(argc, argv);
450 }