X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;ds=sidebyside;f=tools%2Fperf%2Fbuiltin-stat.c;h=c70d72003557f17f29345b0f219dc5ca9f572d75;hb=4ea2f43f28e30050bc99fe3134b6b679f3bf5b22;hp=cdcd058fac08b67ab3591bee3017df8ee2675ee2;hpb=0cfb7a13b8e4e0afd4b856156ab16a182de7505b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cdcd058..c70d720 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -32,6 +32,7 @@ * Wu Fengguang * Mike Galbraith * Paul Mackerras + * Jaswinder Singh Rajput * * Released under the GPL v2. (and only v2, not any later version) */ @@ -41,84 +42,123 @@ #include "util/util.h" #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/event.h" +#include "util/debug.h" #include #include -static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { +static struct perf_event_attr default_attrs[] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, }; -#define MAX_RUN 100 - static int system_wide = 0; -static int verbose = 0; -static int nr_cpus = 0; +static unsigned int nr_cpus = 0; static int run_idx = 0; static int run_count = 1; static int inherit = 1; static int scale = 1; -static int target_pid = -1; +static pid_t target_pid = -1; +static pid_t child_pid = -1; static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static u64 runtime_nsecs[MAX_RUN]; -static u64 walltime_nsecs[MAX_RUN]; -static u64 runtime_cycles[MAX_RUN]; +static int event_scaled[MAX_COUNTERS]; + +struct stats +{ + double n, mean, M2; +}; + +static void update_stats(struct stats *stats, u64 val) +{ + double delta; -static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + stats->n++; + delta = val - stats->mean; + stats->mean += delta / stats->n; + stats->M2 += delta*(val - stats->mean); +} -static u64 event_res_avg[MAX_COUNTERS][3]; -static u64 event_res_noise[MAX_COUNTERS][3]; +static double avg_stats(struct stats *stats) +{ + return stats->mean; +} -static u64 event_scaled_avg[MAX_COUNTERS]; +/* + * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + * + * (\Sum n_i^2) - ((\Sum n_i)^2)/n + * s^2 = ------------------------------- + * n - 1 + * + * http://en.wikipedia.org/wiki/Stddev + * + * The std dev of the mean is related to the std dev by: + * + * s + * s_mean = ------- + * sqrt(n) + * + */ +static double stddev_stats(struct stats *stats) +{ + double variance = stats->M2 / (stats->n - 1); + double variance_mean = variance / stats->n; -static u64 runtime_nsecs_avg; -static u64 runtime_nsecs_noise; + return sqrt(variance_mean); +} -static u64 walltime_nsecs_avg; -static u64 walltime_nsecs_noise; +struct stats event_res_stats[MAX_COUNTERS][3]; +struct stats runtime_nsecs_stats; +struct stats walltime_nsecs_stats; +struct stats runtime_cycles_stats; +struct stats runtime_branches_stats; -static u64 runtime_cycles_avg; -static u64 runtime_cycles_noise; +#define MATCH_EVENT(t, c, counter) \ + (attrs[counter].type == PERF_TYPE_##t && \ + attrs[counter].config == PERF_COUNT_##c) #define ERR_PERF_OPEN \ -"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" +"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" -static void create_perf_stat_counter(int counter) +static void create_perf_stat_counter(int counter, int pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { - int cpu; + unsigned int cpu; + for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[cpu][counter], strerror(errno)); } } else { - attr->inherit = inherit; - attr->disabled = 1; + attr->inherit = inherit; + attr->disabled = 1; + attr->enable_on_exec = 1; - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[0][counter], strerror(errno)); @@ -130,13 +170,8 @@ static void create_perf_stat_counter(int counter) */ static inline int nsec_counter(int counter) { - if (attrs[counter].type != PERF_TYPE_SOFTWARE) - return 0; - - if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) - return 1; - - if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || + MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) return 1; return 0; @@ -147,12 +182,11 @@ static inline int nsec_counter(int counter) */ static void read_counter(int counter) { - u64 *count, single_count[3]; - ssize_t res; - int cpu, nv; + u64 count[3], single_count[3]; + unsigned int cpu; + size_t res, nv; int scaled; - - count = event_res[run_idx][counter]; + int i; count[0] = count[1] = count[2] = 0; @@ -163,6 +197,7 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -176,63 +211,111 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[run_idx][counter] = -1; + event_scaled[counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[run_idx][counter] = 1; + event_scaled[counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } } + + for (i = 0; i < 3; i++) + update_stats(&event_res_stats[counter][i], count[i]); + + if (verbose) { + fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), + count[0], count[1], count[2]); + } + /* * Save the full runtime - to allow normalization during printout: */ - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs[run_idx] = count[0]; - if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles[run_idx] = count[0]; + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) + update_stats(&runtime_nsecs_stats, count[0]); + if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) + update_stats(&runtime_cycles_stats, count[0]); + if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) + update_stats(&runtime_branches_stats, count[0]); } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc __used, const char **argv) { unsigned long long t0, t1; int status = 0; int counter; int pid; + int child_ready_pipe[2], go_pipe[2]; + char buf; if (!system_wide) nr_cpus = 1; - for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); - - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); + if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + perror("failed to create pipes"); + exit(1); + } if ((pid = fork()) < 0) perror("failed to fork"); if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + if (read(go_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + exit(-1); } + child_pid = pid; + + /* + * Wait for the child to be ready to exec. + */ + close(child_ready_pipe[1]); + close(go_pipe[0]); + if (read(child_ready_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); + close(child_ready_pipe[0]); + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter, pid); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + + close(go_pipe[1]); wait(&status); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); - walltime_nsecs[run_idx] = t1 - t0; + update_stats(&walltime_nsecs_stats, t1 - t0); for (counter = 0; counter < nr_counters; counter++) read_counter(counter); @@ -240,46 +323,57 @@ static int run_perf_stat(int argc, const char **argv) return WEXITSTATUS(status); } -static void print_noise(u64 *count, u64 *noise) +static void print_noise(int counter, double avg) { - if (run_count > 1) - fprintf(stderr, " ( +- %7.3f%% )", - (double)noise[0]/(count[0]+1)*100.0); + if (run_count == 1) + return; + + fprintf(stderr, " ( +- %7.3f%% )", + 100 * stddev_stats(&event_res_stats[counter][0]) / avg); } -static void nsec_printout(int counter, u64 *count, u64 *noise) +static void nsec_printout(int counter, double avg) { - double msecs = (double)count[0] / 1000000; + double msecs = avg / 1e6; - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - - if (walltime_nsecs_avg) - fprintf(stderr, " # %10.3f CPUs ", - (double)count[0] / (double)walltime_nsecs_avg); + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { + fprintf(stderr, " # %10.3f CPUs ", + avg / avg_stats(&walltime_nsecs_stats)); } - print_noise(count, noise); } -static void abs_printout(int counter, u64 *count, u64 *noise) +static void abs_printout(int counter, double avg) { - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + double total, ratio = 0.0; - if (runtime_cycles_avg && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + fprintf(stderr, " %14.0f %-24s", avg, event_name(counter)); - fprintf(stderr, " # %10.3f IPC ", - (double)count[0] / (double)runtime_cycles_avg); - } else { - if (runtime_nsecs_avg) { - fprintf(stderr, " # %10.3f M/sec", - (double)count[0]/runtime_nsecs_avg*1000.0); - } + if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { + total = avg_stats(&runtime_cycles_stats); + + if (total) + ratio = avg / total; + + fprintf(stderr, " # %10.3f IPC ", ratio); + } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && + runtime_branches_stats.n != 0) { + total = avg_stats(&runtime_branches_stats); + + if (total) + ratio = avg * 100 / total; + + fprintf(stderr, " # %10.3f %% ", ratio); + + } else if (runtime_nsecs_stats.n != 0) { + total = avg_stats(&runtime_nsecs_stats); + + if (total) + ratio = 1000.0 * avg / total; + + fprintf(stderr, " # %10.3f M/sec", ratio); } - print_noise(count, noise); } /* @@ -287,121 +381,39 @@ static void abs_printout(int counter, u64 *count, u64 *noise) */ static void print_counter(int counter) { - u64 *count, *noise; - int scaled; - - count = event_res_avg[counter]; - noise = event_res_noise[counter]; - scaled = event_scaled_avg[counter]; + double avg = avg_stats(&event_res_stats[counter][0]); + int scaled = event_scaled[counter]; if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", + fprintf(stderr, " %14s %-24s\n", "", event_name(counter)); return; } if (nsec_counter(counter)) - nsec_printout(counter, count, noise); + nsec_printout(counter, avg); else - abs_printout(counter, count, noise); - - if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); - - fprintf(stderr, "\n"); -} + abs_printout(counter, avg); -/* - * normalize_noise noise values down to stddev: - */ -static void normalize_noise(u64 *val) -{ - double res; + print_noise(counter, avg); - res = (double)*val / (run_count * sqrt((double)run_count)); + if (scaled) { + double avg_enabled, avg_running; - *val = (u64)res; -} + avg_enabled = avg_stats(&event_res_stats[counter][1]); + avg_running = avg_stats(&event_res_stats[counter][2]); -static void update_avg(const char *name, int idx, u64 *avg, u64 *val) -{ - *avg += *val; - - if (verbose > 1) - fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); -} -/* - * Calculate the averages and noises: - */ -static void calc_avg(void) -{ - int i, j; - - if (verbose > 1) - fprintf(stderr, "\n"); - - for (i = 0; i < run_count; i++) { - update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); - update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); - update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); - - for (j = 0; j < nr_counters; j++) { - update_avg("counter/0", j, - event_res_avg[j]+0, event_res[i][j]+0); - update_avg("counter/1", j, - event_res_avg[j]+1, event_res[i][j]+1); - update_avg("counter/2", j, - event_res_avg[j]+2, event_res[i][j]+2); - update_avg("scaled", j, - event_scaled_avg + j, event_scaled[i]+j); - } - } - runtime_nsecs_avg /= run_count; - walltime_nsecs_avg /= run_count; - runtime_cycles_avg /= run_count; - - for (j = 0; j < nr_counters; j++) { - event_res_avg[j][0] /= run_count; - event_res_avg[j][1] /= run_count; - event_res_avg[j][2] /= run_count; - } - - for (i = 0; i < run_count; i++) { - runtime_nsecs_noise += - abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); - walltime_nsecs_noise += - abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); - runtime_cycles_noise += - abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); - - for (j = 0; j < nr_counters; j++) { - event_res_noise[j][0] += - abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); - event_res_noise[j][1] += - abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); - event_res_noise[j][2] += - abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); - } + fprintf(stderr, " (scaled from %.2f%%)", + 100 * avg_running / avg_enabled); } - normalize_noise(&runtime_nsecs_noise); - normalize_noise(&walltime_nsecs_noise); - normalize_noise(&runtime_cycles_noise); - - for (j = 0; j < nr_counters; j++) { - normalize_noise(&event_res_noise[j][0]); - normalize_noise(&event_res_noise[j][1]); - normalize_noise(&event_res_noise[j][2]); - } + fprintf(stderr, "\n"); } static void print_stat(int argc, const char **argv) { int i, counter; - calc_avg(); - fflush(stdout); fprintf(stderr, "\n"); @@ -418,11 +430,15 @@ static void print_stat(int argc, const char **argv) for (counter = 0; counter < nr_counters; counter++) print_counter(counter); - - fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", - (double)walltime_nsecs_avg/1e9); fprintf(stderr, "\n"); + fprintf(stderr, " %14.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(stderr, " ( +- %7.3f%% )", + 100*stddev_stats(&walltime_nsecs_stats) / + avg_stats(&walltime_nsecs_stats)); + } + fprintf(stderr, "\n\n"); } static volatile int signr = -1; @@ -434,6 +450,9 @@ static void skip_signal(int signo) static void sig_atexit(void) { + if (child_pid != -1) + kill(child_pid, SIGTERM); + if (signr == -1) return; @@ -456,7 +475,7 @@ static const struct option options[] = { "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, + OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), @@ -467,24 +486,26 @@ static const struct option options[] = { OPT_END() }; -int cmd_stat(int argc, const char **argv, const char *prefix) +int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; - memcpy(attrs, default_attrs, sizeof(attrs)); - - argc = parse_options(argc, argv, options, stat_usage, 0); + argc = parse_options(argc, argv, options, stat_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(stat_usage, options); - if (run_count <= 0 || run_count > MAX_RUN) + if (run_count <= 0) usage_with_options(stat_usage, options); - if (!null_run && !nr_counters) - nr_counters = 8; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!null_run && !nr_counters) { + memcpy(attrs, default_attrs, sizeof(default_attrs)); + nr_counters = ARRAY_SIZE(default_attrs); + } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); + assert((int)nr_cpus >= 0); /* * We dont want to block the signals - that would cause