From 301406b9c69e4914cf45ae9d5f929e7bcf0d93cd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Jun 2009 00:11:21 +0200 Subject: [PATCH 01/49] perf annotate: Print the filename:line for annotated colored lines When we have a colored line in perf annotate, ie a middle/high overhead one, it's sometimes useful to get the matching line and filename from the source file, especially this path prepares to another subsequent one which will print a sorted summary of midle/high overhead lines in the beginning of the output. Filename:Lines have the same color than the concerned ip lines. It can be slow because it relies on addr2line. We could also use objdump with -l but that implies we would have to bufferize objdump output and parse it to filter the relevant lines since we want to print a sorted summary in the beginning. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1244844682-12928-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 98 ++++++++++++++++++++++++++++++++++- tools/perf/util/symbol.h | 1 + 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index b1ed5f766cb3..6a08da41f76b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -39,6 +39,8 @@ static int dump_trace = 0; static int verbose; +static int print_line; + static unsigned long page_size; static unsigned long mmap_window = 32; @@ -84,6 +86,12 @@ typedef union event_union { struct period_event period; } event_t; + +struct sym_ext { + double percent; + char *path; +}; + static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; @@ -1034,6 +1042,8 @@ static int parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) { char *line = NULL, *tmp, *tmp2; + static const char *prev_line; + static const char *prev_color; unsigned int offset; size_t line_len; __u64 line_ip; @@ -1073,15 +1083,20 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) } if (line_ip != -1) { + const char *path = NULL; unsigned int hits = 0; double percent = 0.0; char *color = PERF_COLOR_NORMAL; + struct sym_ext *sym_ext = sym->priv; offset = line_ip - start; if (offset < len) hits = sym->hist[offset]; - if (sym->hist_sum) + if (sym_ext) { + path = sym_ext[offset].path; + percent = sym_ext[offset].percent; + } else if (sym->hist_sum) percent = 100.0 * hits / sym->hist_sum; /* @@ -1096,6 +1111,20 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) color = PERF_COLOR_GREEN; } + /* + * Also color the filename and line if needed, with + * the same color than the percentage. Don't print it + * twice for close colored ip with the same filename:line + */ + if (path) { + if (!prev_line || strcmp(prev_line, path) + || color != prev_color) { + color_fprintf(stdout, color, " %s", path); + prev_line = path; + prev_color = color; + } + } + color_fprintf(stdout, color, " %7.2f", percent); printf(" : "); color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line); @@ -1109,6 +1138,67 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) return 0; } +static void free_source_line(struct symbol *sym, int len) +{ + struct sym_ext *sym_ext = sym->priv; + int i; + + if (!sym_ext) + return; + + for (i = 0; i < len; i++) + free(sym_ext[i].path); + free(sym_ext); + + sym->priv = NULL; +} + +/* Get the filename:line for the colored entries */ +static void get_source_line(struct symbol *sym, __u64 start, int len) +{ + int i; + char cmd[PATH_MAX * 2]; + struct sym_ext *sym_ext; + + if (!sym->hist_sum) + return; + + sym->priv = calloc(len, sizeof(struct sym_ext)); + if (!sym->priv) + return; + + sym_ext = sym->priv; + + for (i = 0; i < len; i++) { + char *path = NULL; + size_t line_len; + __u64 offset; + FILE *fp; + + sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; + if (sym_ext[i].percent <= 0.5) + continue; + + offset = start + i; + sprintf(cmd, "addr2line -e %s %016llx", vmlinux, offset); + fp = popen(cmd, "r"); + if (!fp) + continue; + + if (getline(&path, &line_len, fp) < 0 || !line_len) + goto next; + + sym_ext[i].path = malloc(sizeof(char) * line_len); + if (!sym_ext[i].path) + goto next; + + strcpy(sym_ext[i].path, path); + + next: + pclose(fp); + } +} + static void annotate_sym(struct dso *dso, struct symbol *sym) { char *filename = dso->name; @@ -1135,6 +1225,9 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) end = start + sym->end - sym->start + 1; len = sym->end - sym->start; + if (print_line) + get_source_line(sym, start, len); + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); if (verbose >= 3) @@ -1150,6 +1243,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) } pclose(file); + free_source_line(sym, len); } static void find_annotations(void) @@ -1308,6 +1402,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('l', "print-line", &print_line, + "print matching source lines (may be slow)"), OPT_END() }; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0d1292bd8270..5ad9b06c3f6f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -12,6 +12,7 @@ struct symbol { __u64 obj_start; __u64 hist_sum; __u64 *hist; + void *priv; char name[0]; }; From 971738f3669092dd247eaf89658f2685180492a0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Jun 2009 00:11:22 +0200 Subject: [PATCH 02/49] perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 111 +++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6a08da41f76b..7a5b27867a96 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -25,6 +25,10 @@ #define SHOW_USER 2 #define SHOW_HV 4 +#define MIN_GREEN 0.5 +#define MIN_RED 5.0 + + static char const *input_name = "perf.data"; static char *vmlinux = "vmlinux"; @@ -88,6 +92,7 @@ typedef union event_union { struct sym_ext { + struct rb_node node; double percent; char *path; }; @@ -1038,6 +1043,24 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static char *get_color(double percent) +{ + char *color = PERF_COLOR_NORMAL; + + /* + * We color high-overhead entries in red, mid-overhead + * entries in green - and keep the low overhead places + * normal: + */ + if (percent >= MIN_RED) + color = PERF_COLOR_RED; + else { + if (percent > MIN_GREEN) + color = PERF_COLOR_GREEN; + } + return color; +} + static int parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) { @@ -1086,7 +1109,7 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) const char *path = NULL; unsigned int hits = 0; double percent = 0.0; - char *color = PERF_COLOR_NORMAL; + char *color; struct sym_ext *sym_ext = sym->priv; offset = line_ip - start; @@ -1099,17 +1122,7 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) } else if (sym->hist_sum) percent = 100.0 * hits / sym->hist_sum; - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= 5.0) - color = PERF_COLOR_RED; - else { - if (percent > 0.5) - color = PERF_COLOR_GREEN; - } + color = get_color(percent); /* * Also color the filename and line if needed, with @@ -1138,6 +1151,28 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) return 0; } +static struct rb_root root_sym_ext; + +static void insert_source_line(struct sym_ext *sym_ext) +{ + struct sym_ext *iter; + struct rb_node **p = &root_sym_ext.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct sym_ext, node); + + if (sym_ext->percent > iter->percent) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&sym_ext->node, parent, p); + rb_insert_color(&sym_ext->node, &root_sym_ext); +} + static void free_source_line(struct symbol *sym, int len) { struct sym_ext *sym_ext = sym->priv; @@ -1151,6 +1186,7 @@ static void free_source_line(struct symbol *sym, int len) free(sym_ext); sym->priv = NULL; + root_sym_ext = RB_ROOT; } /* Get the filename:line for the colored entries */ @@ -1193,12 +1229,42 @@ static void get_source_line(struct symbol *sym, __u64 start, int len) goto next; strcpy(sym_ext[i].path, path); + insert_source_line(&sym_ext[i]); next: pclose(fp); } } +static void print_summary(char *filename) +{ + struct sym_ext *sym_ext; + struct rb_node *node; + + printf("\nSorted summary for file %s\n", filename); + printf("----------------------------------------------\n\n"); + + if (RB_EMPTY_ROOT(&root_sym_ext)) { + printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); + return; + } + + node = rb_first(&root_sym_ext); + while (node) { + double percent; + char *color; + char *path; + + sym_ext = rb_entry(node, struct sym_ext, node); + percent = sym_ext->percent; + color = get_color(percent); + path = sym_ext->path; + + color_fprintf(stdout, color, " %7.2f %s", percent, path); + node = rb_next(node); + } +} + static void annotate_sym(struct dso *dso, struct symbol *sym) { char *filename = dso->name; @@ -1211,13 +1277,6 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) if (dso == kernel_dso) filename = vmlinux; - printf("\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", filename); - printf("------------------------------------------------\n"); - - if (verbose >= 2) - printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - start = sym->obj_start; if (!start) start = sym->start; @@ -1225,8 +1284,17 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) end = start + sym->end - sym->start + 1; len = sym->end - sym->start; - if (print_line) + if (print_line) { get_source_line(sym, start, len); + print_summary(filename); + } + + printf("\n\n------------------------------------------------\n"); + printf(" Percent | Source code & Disassembly of %s\n", filename); + printf("------------------------------------------------\n"); + + if (verbose >= 2) + printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); @@ -1243,7 +1311,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) } pclose(file); - free_source_line(sym, len); + if (print_line) + free_source_line(sym, len); } static void find_annotations(void) From 87847b8f26cc7176ec9b239898dc7ce47a94e1a6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 13 Jun 2009 17:06:50 +1000 Subject: [PATCH 03/49] perf_counter: Fix atomic_set vs. atomic64_t type mismatch Using atomic_set on an atomic64_t variable gives a compiler warning on powerpc, and won't give the desired result at runtime. This fixes an instance of this error in the perf_counter code. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18995.20490.979429.244883@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 29b685f551aa..8d14a733f222 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1283,7 +1283,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) if (!interrupts) { perf_disable(); counter->pmu->disable(counter); - atomic_set(&hwc->period_left, 0); + atomic64_set(&hwc->period_left, 0); counter->pmu->enable(counter); perf_enable(); } From d5e8da6449d4ef4bac35ea9b9719a2cda02e7b39 Mon Sep 17 00:00:00 2001 From: Marti Raudsepp Date: Sat, 13 Jun 2009 02:35:01 +0300 Subject: [PATCH 04/49] perf_counter: Fix stack corruption in perf_read_hw With PERF_FORMAT_ID, perf_read_hw now needs space for up to 4 values. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8d14a733f222..e914daff03b5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1553,7 +1553,7 @@ static int perf_release(struct inode *inode, struct file *file) static ssize_t perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { - u64 values[3]; + u64 values[4]; int n; /* From 4d2be1267fcfb3a4d2198fd696aec5e3dcbce60e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 11 Jun 2009 15:28:09 +0530 Subject: [PATCH 05/49] perf_counter, x86: Check old-AMD performance monitoring support AMD supports performance monitoring start from K7 (i.e. family 6), so disable it for earlier AMD CPUs. Signed-off-by: Jaswinder Singh Rajput Cc: Robert Richter Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1244714289.6923.0.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..3c37c3930ca1 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1459,6 +1459,10 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + x86_pmu = amd_pmu; switch (boot_cpu_data.x86) { From f4db43a38f7387c3b19c9565124c06ab0c5d6e9a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 13 Jun 2009 01:06:21 +0530 Subject: [PATCH 06/49] perf_counter, x86: Update AMD hw caching related event table All AMD models share the same hw caching related event table. Also complete the table with more events. Signed-off-by: Jaswinder Singh Rajput Cc: Robert Richter Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1244835381.2802.2.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 36 +++++++++++++----------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3c37c3930ca1..77a59a5566a8 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -389,23 +389,23 @@ static u64 intel_pmu_raw_event(u64 event) return event & CORE_EVNTSEL_MASK; } -static const u64 amd_0f_hw_cache_event_ids +static const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [ C(L1D) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ }, }, [ C(L1I ) ] = { @@ -418,17 +418,17 @@ static const u64 amd_0f_hw_cache_event_ids [ C(RESULT_MISS) ] = -1, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ [ C(RESULT_MISS) ] = 0, }, }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { @@ -438,8 +438,8 @@ static const u64 amd_0f_hw_cache_event_ids }, [ C(DTLB) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -1465,16 +1465,10 @@ static int amd_pmu_init(void) x86_pmu = amd_pmu; - switch (boot_cpu_data.x86) { - case 0x0f: - case 0x10: - case 0x11: - memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); - pr_cont("AMD Family 0f/10/11 events, "); - break; - } return 0; } From 44175b6f397a6724121eeaf0f072e2c912a46614 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Jun 2009 13:35:00 +0200 Subject: [PATCH 07/49] perf stat: Reorganize output - use IPC for the instruction normalization output - CPUs for the CPU utilization factor value. - print out time elapsed like the other rows - tidy up the task-clocks/cpu-clocks printout Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 67 ++++++++++++++++++++-------------- tools/perf/util/parse-events.c | 4 +- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c43e4a97dc42..c12804853eab 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -184,6 +184,40 @@ static void read_counter(int counter) runtime_cycles = count[0]; } +static void nsec_printout(int counter, __u64 *count) +{ + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + + if (walltime_nsecs) + fprintf(stderr, " # %10.3f CPUs", + (double)count[0] / (double)walltime_nsecs); + } +} + +static void abs_printout(int counter, __u64 *count) +{ + fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + + if (runtime_cycles && + attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + + fprintf(stderr, " # %10.3f IPC", + (double)count[0] / (double)runtime_cycles); + + return; + } + + if (runtime_nsecs) + fprintf(stderr, " # %10.3f M/sec", + (double)count[0]/runtime_nsecs*1000.0); +} + /* * Print out the results of a single counter: */ @@ -201,35 +235,15 @@ static void print_counter(int counter) return; } - if (nsec_counter(counter)) { - double msecs = (double)count[0] / 1000000; + if (nsec_counter(counter)) + nsec_printout(counter, count); + else + abs_printout(counter, count); - fprintf(stderr, " %14.6f %-20s", - msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - - if (walltime_nsecs) - fprintf(stderr, " # %11.3f CPU utilization factor", - (double)count[0] / (double)walltime_nsecs); - } - } else { - fprintf(stderr, " %14Ld %-20s", - count[0], event_name(counter)); - if (runtime_nsecs) - fprintf(stderr, " # %11.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); - if (runtime_cycles && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - - fprintf(stderr, " # %1.3f per cycle", - (double)count[0] / (double)runtime_cycles); - } - } if (scaled) fprintf(stderr, " (scaled from %.2f%%)", (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); } @@ -290,8 +304,7 @@ static int do_perf_stat(int argc, const char **argv) fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); + fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9); fprintf(stderr, "\n"); return 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a72586e1df0..f0c9f2627fe1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -63,8 +63,8 @@ static char *hw_event_names[] = { }; static char *sw_event_names[] = { - "cpu-clock-ticks", - "task-clock-ticks", + "cpu-clock-msecs", + "task-clock-msecs", "page-faults", "context-switches", "CPU-migrations", From 42202dd56c717f173cd0bf2390249e1bf5cf210b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Jun 2009 14:57:28 +0200 Subject: [PATCH 08/49] perf stat: Add feature to run and measure a command multiple times Add the --repeat feature to perf stat, which repeats a given command up to a 100 times, collects the stats and calculates an average and a stddev. For example, the following oneliner 'perf stat' command runs hackbench 5 times and prints a tabulated result of all metrics, with averages and noise levels (in percentage) printed: aldebaran:~/linux/linux/tools/perf> ./perf stat --repeat 5 ~/hackbench 10 Time: 0.117 Time: 0.108 Time: 0.089 Time: 0.088 Time: 0.100 Performance counter stats for '/home/mingo/hackbench 10' (5 runs): 1243.989586 task-clock-msecs # 10.460 CPUs ( +- 4.720% ) 47706 context-switches # 0.038 M/sec ( +- 19.706% ) 387 CPU-migrations # 0.000 M/sec ( +- 3.608% ) 17793 page-faults # 0.014 M/sec ( +- 0.354% ) 3770941606 cycles # 3031.329 M/sec ( +- 4.621% ) 1566372416 instructions # 0.415 IPC ( +- 2.703% ) 16783421 cache-references # 13.492 M/sec ( +- 5.202% ) 7128590 cache-misses # 5.730 M/sec ( +- 7.420% ) 0.118924455 seconds time elapsed. The goal of this feature is to allow the reliance on these accurate statistics and to know how many times a command has to be repeated for the noise to go down to an acceptable level. (The -v option can be used to see a line printed out as each run progresses.) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 305 +++++++++++++++++++++++++++----------- 1 file changed, 217 insertions(+), 88 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c12804853eab..9eb42b1ae784 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -43,6 +43,7 @@ #include "util/parse-events.h" #include +#include static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { @@ -79,12 +80,34 @@ static const unsigned int default_count[] = { 10000, }; -static __u64 event_res[MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_COUNTERS]; +#define MAX_RUN 100 -static __u64 runtime_nsecs; -static __u64 walltime_nsecs; -static __u64 runtime_cycles; +static int run_count = 1; +static int run_idx = 0; + +static __u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static __u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + +//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; + + +static __u64 runtime_nsecs[MAX_RUN]; +static __u64 walltime_nsecs[MAX_RUN]; +static __u64 runtime_cycles[MAX_RUN]; + +static __u64 event_res_avg[MAX_COUNTERS][3]; +static __u64 event_res_noise[MAX_COUNTERS][3]; + +static __u64 event_scaled_avg[MAX_COUNTERS]; + +static __u64 runtime_nsecs_avg; +static __u64 runtime_nsecs_noise; + +static __u64 walltime_nsecs_avg; +static __u64 walltime_nsecs_noise; + +static __u64 runtime_cycles_avg; +static __u64 runtime_cycles_noise; static void create_perf_stat_counter(int counter) { @@ -140,7 +163,7 @@ static void read_counter(int counter) int cpu, nv; int scaled; - count = event_res[counter]; + count = event_res[run_idx][counter]; count[0] = count[1] = count[2] = 0; @@ -151,6 +174,8 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); assert(res == nv * sizeof(__u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; count[0] += single_count[0]; if (scale) { @@ -162,13 +187,13 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[counter] = -1; + event_scaled[run_idx][counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[counter] = 1; + event_scaled[run_idx][counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } @@ -178,82 +203,18 @@ static void read_counter(int counter) */ if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs = count[0]; + runtime_nsecs[run_idx] = count[0]; if (attrs[counter].type == PERF_TYPE_HARDWARE && attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles = count[0]; + runtime_cycles[run_idx] = count[0]; } -static void nsec_printout(int counter, __u64 *count) -{ - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); - - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - - if (walltime_nsecs) - fprintf(stderr, " # %10.3f CPUs", - (double)count[0] / (double)walltime_nsecs); - } -} - -static void abs_printout(int counter, __u64 *count) -{ - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); - - if (runtime_cycles && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - - fprintf(stderr, " # %10.3f IPC", - (double)count[0] / (double)runtime_cycles); - - return; - } - - if (runtime_nsecs) - fprintf(stderr, " # %10.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); -} - -/* - * Print out the results of a single counter: - */ -static void print_counter(int counter) -{ - __u64 *count; - int scaled; - - count = event_res[counter]; - scaled = event_scaled[counter]; - - if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", - "", event_name(counter)); - return; - } - - if (nsec_counter(counter)) - nsec_printout(counter, count); - else - abs_printout(counter, count); - - if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); - - fprintf(stderr, "\n"); -} - -static int do_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc, const char **argv) { unsigned long long t0, t1; + int status = 0; int counter; - int status; int pid; - int i; if (!system_wide) nr_cpus = 1; @@ -277,13 +238,168 @@ static int do_perf_stat(int argc, const char **argv) } } - while (wait(&status) >= 0) - ; + wait(&status); prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); - walltime_nsecs = t1 - t0; + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void print_noise(__u64 *count, __u64 *noise) +{ + if (run_count > 1) + fprintf(stderr, " ( +- %7.3f%% )", + (double)noise[0]/(count[0]+1)*100.0); +} + +static void nsec_printout(int counter, __u64 *count, __u64 *noise) +{ + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + + if (walltime_nsecs_avg) + fprintf(stderr, " # %10.3f CPUs ", + (double)count[0] / (double)walltime_nsecs_avg); + } + print_noise(count, noise); +} + +static void abs_printout(int counter, __u64 *count, __u64 *noise) +{ + fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + + if (runtime_cycles_avg && + attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + + fprintf(stderr, " # %10.3f IPC ", + (double)count[0] / (double)runtime_cycles_avg); + } else { + if (runtime_nsecs_avg) { + fprintf(stderr, " # %10.3f M/sec", + (double)count[0]/runtime_nsecs_avg*1000.0); + } + } + print_noise(count, noise); +} + +/* + * Print out the results of a single counter: + */ +static void print_counter(int counter) +{ + __u64 *count, *noise; + int scaled; + + count = event_res_avg[counter]; + noise = event_res_noise[counter]; + scaled = event_scaled_avg[counter]; + + if (scaled == -1) { + fprintf(stderr, " %14s %-20s\n", + "", event_name(counter)); + return; + } + + if (nsec_counter(counter)) + nsec_printout(counter, count, noise); + else + abs_printout(counter, count, noise); + + if (scaled) + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); + + fprintf(stderr, "\n"); +} + +/* + * Normalize noise values down to stddev: + */ +static void normalize(__u64 *val) +{ + double res; + + res = (double)*val / (run_count * sqrt((double)run_count)); + + *val = (__u64)res; +} + +/* + * Calculate the averages and noises: + */ +static void calc_avg(void) +{ + int i, j; + + for (i = 0; i < run_count; i++) { + runtime_nsecs_avg += runtime_nsecs[i]; + walltime_nsecs_avg += walltime_nsecs[i]; + runtime_cycles_avg += runtime_cycles[i]; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] += event_res[i][j][0]; + event_res_avg[j][1] += event_res[i][j][1]; + event_res_avg[j][2] += event_res[i][j][2]; + event_scaled_avg[j] += event_scaled[i][j]; + } + } + runtime_nsecs_avg /= run_count; + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } + + for (i = 0; i < run_count; i++) { + runtime_nsecs_noise += + abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + walltime_nsecs_noise += + abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + runtime_cycles_noise += + abs((__s64)(runtime_cycles[i] - runtime_cycles_avg)); + + for (j = 0; j < nr_counters; j++) { + event_res_noise[j][0] += + abs((__s64)(event_res[i][j][0] - event_res_avg[j][0])); + event_res_noise[j][1] += + abs((__s64)(event_res[i][j][1] - event_res_avg[j][1])); + event_res_noise[j][2] += + abs((__s64)(event_res[i][j][2] - event_res_avg[j][2])); + } + } + + normalize(&runtime_nsecs_noise); + normalize(&walltime_nsecs_noise); + normalize(&runtime_cycles_noise); + + for (j = 0; j < nr_counters; j++) { + normalize(&event_res_noise[j][0]); + normalize(&event_res_noise[j][1]); + normalize(&event_res_noise[j][2]); + } +} + +static void print_stat(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); + + run_idx = 0; fflush(stdout); @@ -293,21 +409,19 @@ static int do_perf_stat(int argc, const char **argv) for (i = 1; i < argc; i++) fprintf(stderr, " %s", argv[i]); - fprintf(stderr, "\':\n"); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) - read_counter(counter); + fprintf(stderr, "\'"); + if (run_count > 1) + fprintf(stderr, " (%d runs)", run_count); + fprintf(stderr, ":\n\n"); for (counter = 0; counter < nr_counters; counter++) print_counter(counter); fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); fprintf(stderr, "\n"); - - return 0; } static volatile int signr = -1; @@ -345,11 +459,15 @@ static const struct option options[] = { "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_INTEGER('r', "repeat", &run_count, + "repeat command and print average + stddev (max: 100)"), OPT_END() }; int cmd_stat(int argc, const char **argv, const char *prefix) { + int status; + page_size = sysconf(_SC_PAGE_SIZE); memcpy(attrs, default_attrs, sizeof(attrs)); @@ -357,6 +475,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); + if (run_count <= 0 || run_count > MAX_RUN) + usage_with_options(stat_usage, options); if (!nr_counters) nr_counters = 8; @@ -376,5 +496,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); - return do_perf_stat(argc, argv); + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + status = run_perf_stat(argc, argv); + } + + print_stat(argc, argv); + + return status; } From ef281a196d66b8bc2d067a3704712e5b93691fbc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Jun 2009 15:40:35 +0200 Subject: [PATCH 09/49] perf stat: Enable raw data to be printed If -vv (very verbose) is specified, print out raw data in the following format: $ perf stat -vv -r 3 ./loop_1b_instructions [ perf stat: executing run #1 ... ] [ perf stat: executing run #2 ... ] [ perf stat: executing run #3 ... ] debug: runtime[0]: 235871872 debug: walltime[0]: 236646752 debug: runtime_cycles[0]: 755150182 debug: counter/0[0]: 235871872 debug: counter/1[0]: 235871872 debug: counter/2[0]: 235871872 debug: scaled[0]: 0 debug: counter/0[1]: 2 debug: counter/1[1]: 235870662 debug: counter/2[1]: 235870662 debug: scaled[1]: 0 debug: counter/0[2]: 1 debug: counter/1[2]: 235870437 debug: counter/2[2]: 235870437 debug: scaled[2]: 0 debug: counter/0[3]: 140 debug: counter/1[3]: 235870298 debug: counter/2[3]: 235870298 debug: scaled[3]: 0 debug: counter/0[4]: 755150182 debug: counter/1[4]: 235870145 debug: counter/2[4]: 235870145 debug: scaled[4]: 0 debug: counter/0[5]: 1001411258 debug: counter/1[5]: 235868838 debug: counter/2[5]: 235868838 debug: scaled[5]: 0 debug: counter/0[6]: 27897 debug: counter/1[6]: 235868560 debug: counter/2[6]: 235868560 debug: scaled[6]: 0 debug: counter/0[7]: 2910 debug: counter/1[7]: 235868151 debug: counter/2[7]: 235868151 debug: scaled[7]: 0 debug: runtime[0]: 235980257 debug: walltime[0]: 236770942 debug: runtime_cycles[0]: 755114546 debug: counter/0[0]: 235980257 debug: counter/1[0]: 235980257 debug: counter/2[0]: 235980257 debug: scaled[0]: 0 debug: counter/0[1]: 3 debug: counter/1[1]: 235980049 debug: counter/2[1]: 235980049 debug: scaled[1]: 0 debug: counter/0[2]: 1 debug: counter/1[2]: 235979907 debug: counter/2[2]: 235979907 debug: scaled[2]: 0 debug: counter/0[3]: 135 debug: counter/1[3]: 235979780 debug: counter/2[3]: 235979780 debug: scaled[3]: 0 debug: counter/0[4]: 755114546 debug: counter/1[4]: 235979652 debug: counter/2[4]: 235979652 debug: scaled[4]: 0 debug: counter/0[5]: 1001439771 debug: counter/1[5]: 235979304 debug: counter/2[5]: 235979304 debug: scaled[5]: 0 debug: counter/0[6]: 23723 debug: counter/1[6]: 235979050 debug: counter/2[6]: 235979050 debug: scaled[6]: 0 debug: counter/0[7]: 2213 debug: counter/1[7]: 235978820 debug: counter/2[7]: 235978820 debug: scaled[7]: 0 debug: runtime[0]: 235888002 debug: walltime[0]: 236700533 debug: runtime_cycles[0]: 754881504 debug: counter/0[0]: 235888002 debug: counter/1[0]: 235888002 debug: counter/2[0]: 235888002 debug: scaled[0]: 0 debug: counter/0[1]: 2 debug: counter/1[1]: 235887793 debug: counter/2[1]: 235887793 debug: scaled[1]: 0 debug: counter/0[2]: 1 debug: counter/1[2]: 235887645 debug: counter/2[2]: 235887645 debug: scaled[2]: 0 debug: counter/0[3]: 135 debug: counter/1[3]: 235887499 debug: counter/2[3]: 235887499 debug: scaled[3]: 0 debug: counter/0[4]: 754881504 debug: counter/1[4]: 235887368 debug: counter/2[4]: 235887368 debug: scaled[4]: 0 debug: counter/0[5]: 1001401731 debug: counter/1[5]: 235887024 debug: counter/2[5]: 235887024 debug: scaled[5]: 0 debug: counter/0[6]: 24212 debug: counter/1[6]: 235886786 debug: counter/2[6]: 235886786 debug: scaled[6]: 0 debug: counter/0[7]: 1824 debug: counter/1[7]: 235886560 debug: counter/2[7]: 235886560 debug: scaled[7]: 0 Performance counter stats for '/home/mingo/loop_1b_instructions' (3 runs): 235.913377 task-clock-msecs # 0.997 CPUs ( +- 0.011% ) 2 context-switches # 0.000 M/sec ( +- 0.000% ) 1 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 136 page-faults # 0.001 M/sec ( +- 0.730% ) 755048744 cycles # 3200.534 M/sec ( +- 0.009% ) 1001417586 instructions # 1.326 IPC ( +- 0.001% ) 25277 cache-references # 0.107 M/sec ( +- 3.988% ) 2315 cache-misses # 0.010 M/sec ( +- 9.845% ) 0.236706075 seconds time elapsed. This allows the summary stats to be validated. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- tools/perf/builtin-stat.c | 46 ++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0cbd5d6874ec..e8346f95fbb0 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -160,7 +160,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') # CFLAGS and LDFLAGS are for the users to override from the command line. CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -LDFLAGS = -lpthread -lrt -lelf +LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9eb42b1ae784..e5b3c0ff03a9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -324,9 +324,9 @@ static void print_counter(int counter) } /* - * Normalize noise values down to stddev: + * normalize_noise noise values down to stddev: */ -static void normalize(__u64 *val) +static void normalize_noise(__u64 *val) { double res; @@ -335,6 +335,13 @@ static void normalize(__u64 *val) *val = (__u64)res; } +static void update_avg(const char *name, int idx, __u64 *avg, __u64 *val) +{ + *avg += *val; + + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} /* * Calculate the averages and noises: */ @@ -342,16 +349,23 @@ static void calc_avg(void) { int i, j; + if (verbose > 1) + fprintf(stderr, "\n"); + for (i = 0; i < run_count; i++) { - runtime_nsecs_avg += runtime_nsecs[i]; - walltime_nsecs_avg += walltime_nsecs[i]; - runtime_cycles_avg += runtime_cycles[i]; + update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); for (j = 0; j < nr_counters; j++) { - event_res_avg[j][0] += event_res[i][j][0]; - event_res_avg[j][1] += event_res[i][j][1]; - event_res_avg[j][2] += event_res[i][j][2]; - event_scaled_avg[j] += event_scaled[i][j]; + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); } } runtime_nsecs_avg /= run_count; @@ -382,14 +396,14 @@ static void calc_avg(void) } } - normalize(&runtime_nsecs_noise); - normalize(&walltime_nsecs_noise); - normalize(&runtime_cycles_noise); + normalize_noise(&runtime_nsecs_noise); + normalize_noise(&walltime_nsecs_noise); + normalize_noise(&runtime_cycles_noise); for (j = 0; j < nr_counters; j++) { - normalize(&event_res_noise[j][0]); - normalize(&event_res_noise[j][1]); - normalize(&event_res_noise[j][2]); + normalize_noise(&event_res_noise[j][0]); + normalize_noise(&event_res_noise[j][1]); + normalize_noise(&event_res_noise[j][2]); } } @@ -399,8 +413,6 @@ static void print_stat(int argc, const char **argv) calc_avg(); - run_idx = 0; - fflush(stdout); fprintf(stderr, "\n"); From c17c2db1f3cea41c3543025905d3582c6937dd95 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Jun 2009 17:39:23 +0200 Subject: [PATCH 10/49] perf annotate: Fixes for filename:line displays - fix addr2line on userspace binary: don't only check kernel image. - fix string allocation size for path: missing ending null char room - fix overflow in symbol extra info Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1244907563-7820-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7a5b27867a96..94cea678fd7e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -1116,7 +1116,7 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) if (offset < len) hits = sym->hist[offset]; - if (sym_ext) { + if (offset < len && sym_ext) { path = sym_ext[offset].path; percent = sym_ext[offset].percent; } else if (sym->hist_sum) @@ -1190,7 +1190,8 @@ static void free_source_line(struct symbol *sym, int len) } /* Get the filename:line for the colored entries */ -static void get_source_line(struct symbol *sym, __u64 start, int len) +static void +get_source_line(struct symbol *sym, __u64 start, int len, char *filename) { int i; char cmd[PATH_MAX * 2]; @@ -1216,7 +1217,7 @@ static void get_source_line(struct symbol *sym, __u64 start, int len) continue; offset = start + i; - sprintf(cmd, "addr2line -e %s %016llx", vmlinux, offset); + sprintf(cmd, "addr2line -e %s %016llx", filename, offset); fp = popen(cmd, "r"); if (!fp) continue; @@ -1224,7 +1225,7 @@ static void get_source_line(struct symbol *sym, __u64 start, int len) if (getline(&path, &line_len, fp) < 0 || !line_len) goto next; - sym_ext[i].path = malloc(sizeof(char) * line_len); + sym_ext[i].path = malloc(sizeof(char) * line_len + 1); if (!sym_ext[i].path) goto next; @@ -1285,7 +1286,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) len = sym->end - sym->start; if (print_line) { - get_source_line(sym, start, len); + get_source_line(sym, start, len, filename); print_summary(filename); } From 8465b05046652cfde3d47692cab2e8ba962f140f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Jun 2009 14:44:07 +0200 Subject: [PATCH 11/49] perf report: Print out raw events in hexa Print out events in hexa dump format, when -D is specified: 0x4868 [0x48]: event: 1 . . ... raw event: size 72 bytes . 0000: 01 00 00 00 00 00 48 00 d4 72 00 00 d4 72 00 00 ......H..r...r. . 0010: 00 00 40 f2 3e 00 00 00 00 30 01 00 00 00 00 00 ..@.>....0..... . 0020: 00 00 00 00 00 00 00 00 2f 75 73 72 2f 6c 69 62 ......../usr/li . 0030: 36 34 2f 6c 69 62 65 6c 66 2d 30 2e 31 34 31 2e 64/libelf-0.141 . 0040: 73 6f 00 00 00 00 00 00 f-0.141 . 0x4868 [0x48]: PERF_EVENT_MMAP 29396: [0x3ef2400000(0x13000) @ (nil)]: /usr/lib64/libelf-0.141.so This helps the debugging of mis-parsing of data files, and helps the addition of new sample/trace formats. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 82fa93b4db99..37515da637f7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1095,9 +1095,43 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static void trace_event(event_t *event) +{ + unsigned char *raw_event = (void *)event; + int i, j; + + if (!dump_trace) + return; + + dprintf(".\n. ... raw event: size %d bytes\n", event->header.size); + + for (i = 0; i < event->header.size; i++) { + if ((i & 15) == 0) + dprintf(". %04x: ", i); + + dprintf(" %02x", raw_event[i]); + + if (((i & 15) == 15) || i == event->header.size-1) { + dprintf(" "); + for (j = 0; j < 15-(i & 15); j++) + dprintf(" "); + for (j = 0; j < (i & 15); j++) { + if (isprint(raw_event[i-15+j])) + dprintf("%c", raw_event[i-15+j]); + else + dprintf("."); + } + dprintf("\n"); + } + } + dprintf(".\n"); +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { + trace_event(event); + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) return process_overflow_event(event, offset, head); @@ -1204,7 +1238,7 @@ more: size = event->header.size; - dprintf("%p [%p]: event: %d\n", + dprintf("\n%p [%p]: event: %d\n", (void *)(offset + head), (void *)(long)event->header.size, event->header.type); From 3efa1cc99ec51bc7a7ae0011a16619fd20dbe6ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Jun 2009 15:04:15 +0200 Subject: [PATCH 12/49] perf record/report: Add call graph / call chain profiling Add the first steps of call-graph profiling: - add the -c (--call-graph) option to perf record - parse the call-graph record and printout out under -D (--dump-trace) The call-graph data is not put into the histogram yet, but it can be seen that it's being processed correctly: 0x3ce0 [0x38]: event: 35 . . ... raw event: size 56 bytes . 0000: 23 00 00 00 05 00 38 00 d4 df 0e 81 ff ff ff ff #.....8........ . 0010: 60 0b 00 00 60 0b 00 00 03 00 00 00 01 00 02 00 `...`.......... . 0020: d4 df 0e 81 ff ff ff ff a0 61 ed 41 36 00 00 00 .........a.A6.. . 0030: 04 92 e6 41 36 00 00 00 .a.A6.. . 0x3ce0 [0x38]: PERF_EVENT (IP, 5): 2912: 0xffffffff810edfd4 period: 1 ... chain: u:2, k:1, nr:3 ..... 0: 0xffffffff810edfd4 ..... 1: 0x3641ed61a0 ..... 2: 0x3641e69204 ... thread: perf:2912 ...... dso: [kernel] This shows a 3-entry call-graph: with 1 kernel-space and two user-space entries Cc: Frederic Weisbecker Cc: Pekka Enberg Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 8 ++++++ tools/perf/builtin-report.c | 57 +++++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0f5771f615da..a177a591b52c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -37,6 +37,7 @@ static pid_t target_pid = -1; static int inherit = 1; static int force = 0; static int append_file = 0; +static int call_graph = 0; static int verbose = 0; static long samples; @@ -351,11 +352,16 @@ static void create_counter(int counter, int cpu, pid_t pid) int track = 1; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; attr->sample_freq = freq; } + + if (call_graph) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; @@ -555,6 +561,8 @@ static const struct option options[] = { "profile at this frequency"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), + OPT_BOOLEAN('g', "call-graph", &call_graph, + "do call-graph (stack chain/backtrace) recording"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 37515da637f7..aebba5659345 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -36,6 +36,7 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int dump_trace = 0; #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) +#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int verbose; static int full_paths; @@ -43,11 +44,19 @@ static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; +struct ip_chain_event { + __u16 nr; + __u16 hv; + __u16 kernel; + __u16 user; + __u64 ips[]; +}; + struct ip_event { struct perf_event_header header; __u64 ip; __u32 pid, tid; - __u64 period; + unsigned char __more_data[]; }; struct mmap_event { @@ -944,9 +953,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 ip = event->ip.ip; __u64 period = 1; struct map *map = NULL; + void *more_data = event->ip.__more_data; + struct ip_chain_event *chain; - if (event->header.type & PERF_SAMPLE_PERIOD) - period = event->ip.period; + if (event->header.type & PERF_SAMPLE_PERIOD) { + period = *(__u64 *)more_data; + more_data += sizeof(__u64); + } dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), @@ -956,6 +969,22 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); + if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + int i; + + chain = (void *)more_data; + + if (dump_trace) { + dprintf("... chain: u:%d, k:%d, nr:%d\n", + chain->user, + chain->kernel, + chain->nr); + + for (i = 0; i < chain->nr; i++) + dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); + } + } + dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { @@ -1098,30 +1127,34 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) static void trace_event(event_t *event) { unsigned char *raw_event = (void *)event; + char *color = PERF_COLOR_BLUE; int i, j; if (!dump_trace) return; - dprintf(".\n. ... raw event: size %d bytes\n", event->header.size); + dprintf("."); + cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); for (i = 0; i < event->header.size; i++) { - if ((i & 15) == 0) - dprintf(". %04x: ", i); + if ((i & 15) == 0) { + dprintf("."); + cdprintf(" %04x: ", i); + } - dprintf(" %02x", raw_event[i]); + cdprintf(" %02x", raw_event[i]); if (((i & 15) == 15) || i == event->header.size-1) { - dprintf(" "); + cdprintf(" "); for (j = 0; j < 15-(i & 15); j++) - dprintf(" "); + cdprintf(" "); for (j = 0; j < (i & 15); j++) { if (isprint(raw_event[i-15+j])) - dprintf("%c", raw_event[i-15+j]); + cdprintf("%c", raw_event[i-15+j]); else - dprintf("."); + cdprintf("."); } - dprintf("\n"); + cdprintf("\n"); } } dprintf(".\n"); From 5a6cec3abbdb74244caab68db100825a8c4ac02d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 May 2009 11:25:09 +0200 Subject: [PATCH 13/49] perf_counter, x86: Fix call-chain walking Fix the ptregs variant when we hit user-mode tasks. Cc: Frederic Weisbecker Cc: Pekka Enberg Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 77a59a5566a8..09d8cb69c3f3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1644,7 +1644,9 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) const void __user *fp; int nr = entry->nr; - regs = (struct pt_regs *)current->thread.sp0 - 1; + if (!user_mode(regs)) + regs = task_pt_regs(current); + fp = (void __user *)regs->bp; callchain_store(entry, regs->ip); @@ -1656,7 +1658,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if (!copy_stack_frame(fp, &frame)) break; - if ((unsigned long)fp < user_stack_pointer(regs)) + if ((unsigned long)fp < regs->sp) break; callchain_store(entry, frame.return_address); From 038e836e97e70c4ad2b5058b07fc7207f50b59dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 09:57:59 +0200 Subject: [PATCH 14/49] perf_counter, x86: Fix kernel-space call-chains Kernel-space call-chains were trimmed at the first entry because we never processed anything beyond the first stack context. Allow the backtrace to jump from NMI to IRQ stack then to task stack and finally user-space stack. Also calculate the stack and bp variables correctly so that the stack walker does not exit early. We can get deep traces as a result, visible in perf report -D output: 0x32af0 [0xe0]: PERF_EVENT (IP, 5): 15134: 0xffffffff815225fd period: 1 ... chain: u:2, k:22, nr:24 ..... 0: 0xffffffff815225fd ..... 1: 0xffffffff810ac51c ..... 2: 0xffffffff81018e29 ..... 3: 0xffffffff81523939 ..... 4: 0xffffffff81524b8f ..... 5: 0xffffffff81524bd9 ..... 6: 0xffffffff8105e498 ..... 7: 0xffffffff8152315a ..... 8: 0xffffffff81522c3a ..... 9: 0xffffffff810d9b74 ..... 10: 0xffffffff810dbeec ..... 11: 0xffffffff810dc3fb This is a 22-entries kernel-space chain. (We still only record reliable stack entries.) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 09d8cb69c3f3..6d5e7cfd97e7 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1575,8 +1575,8 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - /* Don't bother with IRQ stacks for now */ - return -1; + /* Process all stacks: */ + return 0; } static void backtrace_address(void *data, unsigned long addr, int reliable) @@ -1594,6 +1594,8 @@ static const struct stacktrace_ops backtrace_ops = { .address = backtrace_address, }; +#include "../dumpstack.h" + static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1601,26 +1603,20 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) char *stack; int nr = entry->nr; - callchain_store(entry, instruction_pointer(regs)); + callchain_store(entry, regs->ip); stack = ((char *)regs + sizeof(struct pt_regs)); #ifdef CONFIG_FRAME_POINTER - bp = frame_pointer(regs); + get_bp(bp); #else bp = 0; #endif - dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); + dump_trace(NULL, regs, (void *)&stack, bp, &backtrace_ops, entry); entry->kernel = entry->nr - nr; } - -struct stack_frame { - const void __user *next_fp; - unsigned long return_address; -}; - static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { int ret; @@ -1652,7 +1648,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, regs->ip); while (entry->nr < MAX_STACK_DEPTH) { - frame.next_fp = NULL; + frame.next_frame = NULL; frame.return_address = 0; if (!copy_stack_frame(fp, &frame)) @@ -1662,7 +1658,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) break; callchain_store(entry, frame.return_address); - fp = frame.next_fp; + fp = frame.next_frame; } entry->user = entry->nr - nr; From 613d8602292165f86ba1969784fea01a06d55900 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 08:17:12 +0200 Subject: [PATCH 15/49] perf record: Fix fast task-exit race Recording with -a (or with -p) can race with tasks going away: couldn't open /proc/8440/maps Causing an early exit() and no recording done. Do not abort the recording session - instead just skip that task. Also, only print the warnings under -v. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a177a591b52c..e1dfef24887f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -202,8 +202,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full) fd = open(filename, O_RDONLY); if (fd < 0) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); + /* + * We raced with a task exiting - just return: + */ + if (verbose) + fprintf(stderr, "couldn't open %s\n", filename); + return; } if (read(fd, bf, sizeof(bf)) < 0) { fprintf(stderr, "couldn't read %s\n", filename); @@ -273,8 +277,12 @@ static void pid_synthesize_mmap_samples(pid_t pid) fp = fopen(filename, "r"); if (fp == NULL) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); + /* + * We raced with a task exiting - just return: + */ + if (verbose) + fprintf(stderr, "couldn't open %s\n", filename); + return; } while (1) { char bf[BUFSIZ], *pbf = bf; From 75f937f24bd9c003dcb9d7d5509f23459f1f6000 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 15:05:12 +0200 Subject: [PATCH 16/49] perf_counter: Fix ctx->mutex vs counter->mutex inversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simon triggered a lockdep inversion report about us taking ctx->mutex vs counter->mutex in inverse orders. Fix that up. Reported-by: Simon Holm Thøgersen Tested-by: Simon Holm Thøgersen Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e914daff03b5..109a95723859 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1620,22 +1620,6 @@ static void perf_counter_reset(struct perf_counter *counter) perf_counter_update_userpage(counter); } -static void perf_counter_for_each_sibling(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *sibling; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - counter = counter->group_leader; - - func(counter); - list_for_each_entry(sibling, &counter->sibling_list, list_entry) - func(sibling); - mutex_unlock(&ctx->mutex); -} - /* * Holding the top-level counter's child_mutex means that any * descendant process that has inherited this counter will block @@ -1658,14 +1642,18 @@ static void perf_counter_for_each_child(struct perf_counter *counter, static void perf_counter_for_each(struct perf_counter *counter, void (*func)(struct perf_counter *)) { - struct perf_counter *child; + struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *sibling; - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - perf_counter_for_each_sibling(counter, func); - list_for_each_entry(child, &counter->child_list, child_list) - perf_counter_for_each_sibling(child, func); - mutex_unlock(&counter->child_mutex); + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + counter = counter->group_leader; + + perf_counter_for_each_child(counter, func); + func(counter); + list_for_each_entry(sibling, &counter->sibling_list, list_entry) + perf_counter_for_each_child(counter, func); + mutex_unlock(&ctx->mutex); } static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) From 465a454f254ee2ff7acc4aececbe31f8af046bc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 12:31:37 +0200 Subject: [PATCH 17/49] x86, mm: Add __get_user_pages_fast() Introduce a gup_fast() variant which is usable from IRQ/NMI context. Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/gup.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 6 +++++ 2 files changed, 62 insertions(+) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798a..697d5727c119 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed on x86. + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86, with the above PAE exception), we can follow the + * address down to the the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address diff --git a/include/linux/mm.h b/include/linux/mm.h index ad613ed66ab0..b457bc047ab1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -862,6 +862,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +/* + * doesn't attempt to fault and will return short. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages); + /* * A callback you can register to apply pressure to ageable caches. * From 3ff0141aa3a03ca3388b40b36167d0a37919f3fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 12:40:41 +0200 Subject: [PATCH 18/49] x86: Add NMI types for kmap_atomic Two new kmap_atomic slots for NMI context. And teach pte_offset_map() about NMI context. Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kmap_types.h | 4 +++- arch/x86/include/asm/pgtable_32.h | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index 5759c165a5cf..ff00a44b7d0d 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h @@ -21,7 +21,9 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_NMI, +D(14) KM_NMI_PTE, +D(15) KM_TYPE_NR }; #undef D diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..85464971bca0 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,13 +49,14 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) +#define __KM_PTE (in_nmi() ? KM_NMI_PTE : KM_PTE0) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) +#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) #else #define pte_offset_map(dir, address) \ From 74193ef0ecab92535c8517f082f1f50504526c9b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 13:07:24 +0200 Subject: [PATCH 19/49] perf_counter: x86: Fix call-chain support to use NMI-safe methods __copy_from_user_inatomic() isn't NMI safe in that it can trigger the page fault handler which is another trap and its return path invokes IRET which will also close the NMI context. Therefore use a GUP based approach to copy the stack frames over. We tried an alternative solution as well: we used a forward ported version of Mathieu Desnoyers's "NMI safe INT3 and Page Fault" patch that modifies the exception return path to use an open-coded IRET with explicit stack unrolling and TF checking. This didnt work as it interacted with faulting user-space instructions, causing them not to restart properly, which corrupts user-space registers. Solving that would probably involve disassembling those instructions and backtracing the RIP. But even without that, the code was deemed rather complex to the already non-trivial x86 entry assembly code, so instead we went for this GUP based method that does a software-walk of the pagetables. Signed-off-by: Peter Zijlstra Cc: Nick Piggin Cc: Pekka Enberg Cc: Vegard Nossum Cc: Jeremy Fitzhardinge Cc: Mathieu Desnoyers Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 49 ++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6d5e7cfd97e7..e8c68a5091df 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1617,20 +1618,48 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) entry->kernel = entry->nr - nr; } -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; int ret; - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); - return ret; + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + unsigned long bytes; + + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); + + return bytes == sizeof(*frame); } static void @@ -1643,7 +1672,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if (!user_mode(regs)) regs = task_pt_regs(current); - fp = (void __user *)regs->bp; + fp = (void __user *)regs->bp; callchain_store(entry, regs->ip); From 3dfabc74c65904c9e6cf952391312d16ea772ef5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 11:24:38 +0200 Subject: [PATCH 20/49] perf report: Add per system call overhead histogram Take advantage of call-graph percounter sampling/recording to display a non-trivial histogram: the true, collapsed/summarized cost measurement, on a per system call total overhead basis: aldebaran:~/linux/linux/tools/perf> ./perf record -g -a -f ~/hackbench 10 aldebaran:~/linux/linux/tools/perf> ./perf report -s symbol --syscalls | head -10 # # (3536 samples) # # Overhead Symbol # ........ ...... # 40.75% [k] sys_write 40.21% [k] sys_read 4.44% [k] do_nmi ... This is done by accounting each (reliable) call-chain that chains back to a given system call to that system call function. [ So in the above example we can see that hackbench spends about 40% of its total time somewhere in sys_write() and 40% somewhere in sys_read(), the rest of the time is spent in user-space. The time is not spent in sys_write() _itself_ but in one of its many child functions. ] Or, a recording of a (source files are already in the page-cache) kernel build: $ perf record -g -m 512 -f -- make -j32 kernel $ perf report -s s --syscalls | grep '\[k\]' | grep -v nmi 4.14% [k] do_page_fault 1.20% [k] sys_write 1.10% [k] sys_open 0.63% [k] sys_exit_group 0.48% [k] smp_apic_timer_interrupt 0.37% [k] sys_read 0.37% [k] sys_execve 0.20% [k] sys_mmap 0.18% [k] sys_close 0.14% [k] sys_munmap 0.13% [k] sys_poll 0.09% [k] sys_newstat 0.07% [k] sys_clone 0.06% [k] sys_newfstat 0.05% [k] sys_access 0.05% [k] schedule Shows the true total cost of each syscall variant that gets used during a kernel build. This profile reveals it that pagefaults are the costliest, followed by read()/write(). An interesting detail: timer interrupts cost 0.5% - or 0.5 seconds per 100 seconds of kernel build-time. (this was done with HZ=1000) The summary is done in 'perf report', i.e. in the post-processing stage - so once we have a good call-graph recording, this type of non-trivial high-level analysis becomes possible. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Frederic Weisbecker Cc: Pekka Enberg LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index aebba5659345..1e2f5dde312c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,6 +40,7 @@ static int dump_trace = 0; static int verbose; static int full_paths; +static int collapse_syscalls; static unsigned long page_size; static unsigned long mmap_window = 32; @@ -983,6 +984,15 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) for (i = 0; i < chain->nr; i++) dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); } + if (collapse_syscalls) { + /* + * Find the all-but-last kernel entry + * amongst the call-chains - to get + * to the level of system calls: + */ + if (chain->kernel >= 2) + ip = chain->ips[chain->kernel-2]; + } } dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); @@ -1343,6 +1353,8 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), + OPT_BOOLEAN('S', "syscalls", &collapse_syscalls, + "show per syscall summary overhead, using call graph"), OPT_END() }; From 90c8f954534ba15e4542ab00dd9f0e58b071518c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jun 2009 21:36:52 +1000 Subject: [PATCH 21/49] perf_counter: powerpc: Fix two compile warnings This fixes a couple of compile warnings that crept into the powerpc perf_counter code recently: CC arch/powerpc/kernel/perf_counter.o arch/powerpc/kernel/perf_counter.c: In function 'record_and_restart': arch/powerpc/kernel/perf_counter.c:1016: warning: unused variable 'addr' arch/powerpc/kernel/perf_counter.c: In function 'hw_perf_counter_init': arch/powerpc/kernel/perf_counter.c:891: warning: 'ev' may be used uninitialized in this function Stephen Rothwell reported this against linux-next as well. Reported-by: Stephen Rothwell Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18998.12884.787039.22202@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb202388170e..e6dc1850191c 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -913,6 +913,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) case PERF_TYPE_RAW: ev = counter->attr.config; break; + default: + return ERR_PTR(-EINVAL); } counter->hw.config_base = ev; counter->hw.idx = 0; @@ -1013,7 +1015,7 @@ static void record_and_restart(struct perf_counter *counter, long val, u64 period = counter->hw.sample_period; s64 prev, delta, left; int record = 0; - u64 addr, mmcra, sdsync; + u64 mmcra, sdsync; /* we don't have to worry about interrupts here */ prev = atomic64_read(&counter->hw.prev_count); From 9974458e2f9a11dbd2f4bd14fab5a79af4907b41 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jun 2009 21:45:16 +1000 Subject: [PATCH 22/49] perf_counter: Make set_perf_counter_pending() declaration common At present, every architecture that supports perf_counters has to declare set_perf_counter_pending() in its arch-specific headers. This consolidates the declarations into a single declaration in one common place, include/linux/perf_counter.h. On powerpc, we continue to provide a static inline definition of set_perf_counter_pending() in the powerpc hw_irq.h. Also, this removes from the x86 perf_counter.h the unused null definitions of {test,clear}_perf_counter_pending. Reported-by: Mike Frysinger Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <18998.13388.920691.523227@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 1 - arch/powerpc/include/asm/perf_counter.h | 2 ++ arch/x86/include/asm/perf_counter.h | 5 ----- include/linux/perf_counter.h | 1 + 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 53512374e1c9..1974cf191b03 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -163,7 +163,6 @@ static inline unsigned long test_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b8..b398a84edced 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -10,6 +10,8 @@ */ #include +#include + #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,11 +84,6 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1b3118a1023a..eccae437fe37 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -604,6 +604,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_free_task(struct task_struct *task); +extern void set_perf_counter_pending(void); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); From e2eae0f5605b90a0838608043c21050b08b6dd95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 16:15:19 +0200 Subject: [PATCH 23/49] perf report: Fix 32-bit printf format Yong Wang reported the following compiler warning: builtin-report.c: In function 'process_overflow_event': builtin-report.c:984: error: cast to pointer from integer of different size Which happens because we try to print ->ips[] out with a limited format, losing the high 32 bits. Print it out using %016Lx instead. Reported-by: Yong Wang Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1e2f5dde312c..f86bb07c0e84 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -982,7 +982,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) chain->nr); for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); + dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); } if (collapse_syscalls) { /* From 0990b1c65729012a63e0eeca93aaaafea4e9a064 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 16:46:05 +0200 Subject: [PATCH 24/49] x86: Add NMI types for kmap_atomic, fix I just realized this has a kmap_atomic bug in... The below would fix it - but it's complicating this code some more. Alternatively I would have to introduce something like pte_offset_map_irq() which would make the irq/nmi detection and leave the regular code paths alone, however that would mean either duplicating the gup_fast() pagewalk or passing down a pte function pointer, which would only duplicate the gup_pte_range() bit, neither is really attractive ... Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kmap_types.h | 11 ++++++----- arch/x86/include/asm/pgtable_32.h | 5 ++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index ff00a44b7d0d..f86613846198 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h @@ -19,11 +19,12 @@ D(7) KM_PTE0, D(8) KM_PTE1, D(9) KM_IRQ0, D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_NMI, -D(14) KM_NMI_PTE, -D(15) KM_TYPE_NR +D(11) KM_IRQ_PTE, +D(12) KM_SOFTIRQ0, +D(13) KM_SOFTIRQ1, +D(14) KM_NMI, +D(15) KM_NMI_PTE, +D(16) KM_TYPE_NR }; #undef D diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 85464971bca0..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,7 +49,10 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) -#define __KM_PTE (in_nmi() ? KM_NMI_PTE : KM_PTE0) +#define __KM_PTE \ + (in_nmi() ? KM_NMI_PTE : \ + in_irq() ? KM_IRQ_PTE : \ + KM_PTE0) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) From 6e7d6fdcbeefa9434653b5e5da12909636ea1d52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2009 15:51:44 +0200 Subject: [PATCH 25/49] perf report: Add --sort --call <$regex> Implement sorting by callchain symbols, --sort . It will create a new column which will show a match to --call $regex or "[unmatched]". Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 209 +++++++++++++++++++++++++++--------- 1 file changed, 158 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f86bb07c0e84..cd74b2e58adb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,11 +40,13 @@ static int dump_trace = 0; static int verbose; static int full_paths; -static int collapse_syscalls; static unsigned long page_size; static unsigned long mmap_window = 32; +static char *call = "^sys_"; +static regex_t call_regex; + struct ip_chain_event { __u16 nr; __u16 hv; @@ -463,6 +465,7 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; + struct symbol *call; __u64 ip; char level; @@ -483,6 +486,16 @@ struct sort_entry { size_t (*print)(FILE *fp, struct hist_entry *); }; +static int64_t cmp_null(void *l, void *r) +{ + if (!l && !r) + return 0; + else if (!l) + return -1; + else + return 1; +} + /* --sort pid */ static int64_t @@ -517,14 +530,8 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) char *comm_l = left->thread->comm; char *comm_r = right->thread->comm; - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } + if (!comm_l || !comm_r) + return cmp_null(comm_l, comm_r); return strcmp(comm_l, comm_r); } @@ -550,14 +557,8 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) struct dso *dso_l = left->dso; struct dso *dso_r = right->dso; - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); return strcmp(dso_l->name, dso_r->name); } @@ -617,7 +618,38 @@ static struct sort_entry sort_sym = { .print = sort__sym_print, }; +/* --sort call */ + +static int64_t +sort__call_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct symbol *sym_l = left->call; + struct symbol *sym_r = right->call; + + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + return strcmp(sym_l->name, sym_r->name); +} + +static size_t +sort__call_print(FILE *fp, struct hist_entry *self) +{ + size_t ret = 0; + + ret += fprintf(fp, "%-20s", self->call ? self->call->name : "[unmatched]"); + + return ret; +} + +static struct sort_entry sort_call = { + .header = "Callchain symbol ", + .cmp = sort__call_cmp, + .print = sort__call_print, +}; + static int sort__need_collapse = 0; +static int sort__has_call = 0; struct sort_dimension { char *name; @@ -630,6 +662,7 @@ static struct sort_dimension sort_dimensions[] = { { .name = "comm", .entry = &sort_comm, }, { .name = "dso", .entry = &sort_dso, }, { .name = "symbol", .entry = &sort_sym, }, + { .name = "call", .entry = &sort_call, }, }; static LIST_HEAD(hist_entry__sort_list); @@ -650,6 +683,18 @@ static int sort_dimension__add(char *tok) if (sd->entry->collapse) sort__need_collapse = 1; + if (sd->entry == &sort_call) { + int ret = regcomp(&call_regex, call, REG_EXTENDED); + if (ret) { + char err[BUFSIZ]; + + regerror(ret, &call_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", call, err); + exit(-1); + } + sort__has_call = 1; + } + list_add_tail(&sd->entry->list, &hist_entry__sort_list); sd->taken = 1; @@ -730,13 +775,76 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) return ret; } +/* + * + */ + +static struct symbol * +resolve_symbol(struct thread *thread, struct map **mapp, + struct dso **dsop, __u64 *ipp) +{ + struct dso *dso = dsop ? *dsop : NULL; + struct map *map = mapp ? *mapp : NULL; + uint64_t ip = *ipp; + + if (!thread) + return NULL; + + if (dso) + goto got_dso; + + if (map) + goto got_map; + + map = thread__find_map(thread, ip); + if (map != NULL) { + if (mapp) + *mapp = map; +got_map: + ip = map->map_ip(map, ip); + *ipp = ip; + + dso = map->dso; + } else { + /* + * If this is outside of all known maps, + * and is a negative address, try to look it + * up in the kernel dso, as it might be a + * vsyscall (which executes in user-mode): + */ + if ((long long)ip < 0) + dso = kernel_dso; + } + dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + + if (dsop) + *dsop = dso; + + if (!dso) + return NULL; +got_dso: + return dso->find_symbol(dso, ip); +} + +static struct symbol *call__match(struct symbol *sym) +{ + if (!sym) + return NULL; + + if (sym->name && !regexec(&call_regex, sym->name, 0, NULL, 0)) + return sym; + + return NULL; +} + /* * collect histogram counts */ static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level, __u64 count) + struct symbol *sym, __u64 ip, struct ip_chain_event *chain, + char level, __u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -752,6 +860,33 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; + if (sort__has_call && chain) { + int i, nr = chain->hv; + struct symbol *sym; + struct dso *dso; + __u64 ip; + + for (i = 0; i < chain->kernel; i++) { + ip = chain->ips[nr + i]; + dso = kernel_dso; + sym = resolve_symbol(thread, NULL, &dso, &ip); + entry.call = call__match(sym); + if (entry.call) + goto got_call; + } + nr += i; + + for (i = 0; i < chain->user; i++) { + ip = chain->ips[nr + i]; + sym = resolve_symbol(thread, NULL, NULL, &ip); + entry.call = call__match(sym); + if (entry.call) + goto got_call; + } + nr += i; + } +got_call: + while (*p != NULL) { parent = *p; he = rb_entry(parent, struct hist_entry, rb_node); @@ -955,7 +1090,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; - struct ip_chain_event *chain; + struct ip_chain_event *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { period = *(__u64 *)more_data; @@ -984,15 +1119,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) for (i = 0; i < chain->nr; i++) dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); } - if (collapse_syscalls) { - /* - * Find the all-but-last kernel entry - * amongst the call-chains - to get - * to the level of system calls: - */ - if (chain->kernel >= 2) - ip = chain->ips[chain->kernel-2]; - } } dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); @@ -1016,22 +1142,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) show = SHOW_USER; level = '.'; - map = thread__find_map(thread, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dprintf(" ...... dso: %s\n", dso ? dso->name : ""); - } else { show = SHOW_HV; level = 'H'; @@ -1039,12 +1149,9 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) } if (show & show_mask) { - struct symbol *sym = NULL; + struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - if (dso) - sym = dso->find_symbol(dso, ip); - - if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { fprintf(stderr, "problem incrementing symbol count, skipping event\n"); return -1; @@ -1353,8 +1460,8 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), - OPT_BOOLEAN('S', "syscalls", &collapse_syscalls, - "show per syscall summary overhead, using call graph"), + OPT_STRING('c', "call", &call, "regex", + "regex to use for --sort call"), OPT_END() }; From 60f916dee612130c9977a8edd4abee98334202ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 19:00:20 +0200 Subject: [PATCH 26/49] perf_counter: x86: Set the period in the intel overflow handler Commit 9e350de37ac960 ("perf_counter: Accurate period data") missed a spot, which caused all Intel-PMU samples to have a period of 0. This broke auto-freq sampling. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index e8c68a5091df..ce1ae3f1f86c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1224,6 +1224,8 @@ again: if (!intel_pmu_save_and_restart(counter)) continue; + data.period = counter->hw.last_period; + if (perf_counter_overflow(counter, 1, &data)) intel_pmu_disable_counter(&counter->hw, bit); } From 5aa75a0fd4bc6402899e06fdb853cab024d65055 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 20:11:41 +0200 Subject: [PATCH 27/49] perf_counter tools: Replace isprint() with issane() The Git utils came with a ctype replacement that doesn't provide isprint(). Add a replacement. Solves a build bug on certain distros. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- tools/perf/util/util.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cd74b2e58adb..707f60ce32fd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1266,7 +1266,7 @@ static void trace_event(event_t *event) for (j = 0; j < 15-(i & 15); j++) cdprintf(" "); for (j = 0; j < (i & 15); j++) { - if (isprint(raw_event[i-15+j])) + if (issane(raw_event[i-15+j])) cdprintf("%c", raw_event[i-15+j]); else cdprintf("."); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 76590a16c271..ce9b514f60a3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -343,6 +343,7 @@ extern unsigned char sane_ctype[256]; #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define issane(x) sane_istest(x,GIT_SPACE | GIT_DIGIT | GIT_ALPHA | GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) From b25bcf2f133b1e6216c3d40be394756107d3880f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 07:01:03 +0200 Subject: [PATCH 28/49] perf report: Tidy up the "--parent " and "--sort parent" call-chain features Instead of the ambigious 'call' naming use the much more specific 'parent' naming: - rename --call to --parent - rename --sort call to --sort parent - rename [unmatched] to [other] - to signal that this is not an error but the inverse set Also add pagefaults to the default parent-symbol pattern too, as it's a 'syscall overhead category' in a sense. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 67 +++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 707f60ce32fd..986834623b43 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -44,8 +44,8 @@ static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; -static char *call = "^sys_"; -static regex_t call_regex; +static char *parent_pattern = "^sys_|^do_page_fault"; +static regex_t parent_regex; struct ip_chain_event { __u16 nr; @@ -465,7 +465,7 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; - struct symbol *call; + struct symbol *parent; __u64 ip; char level; @@ -618,13 +618,13 @@ static struct sort_entry sort_sym = { .print = sort__sym_print, }; -/* --sort call */ +/* --sort parent */ static int64_t -sort__call_cmp(struct hist_entry *left, struct hist_entry *right) +sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) { - struct symbol *sym_l = left->call; - struct symbol *sym_r = right->call; + struct symbol *sym_l = left->parent; + struct symbol *sym_r = right->parent; if (!sym_l || !sym_r) return cmp_null(sym_l, sym_r); @@ -633,23 +633,23 @@ sort__call_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__call_print(FILE *fp, struct hist_entry *self) +sort__parent_print(FILE *fp, struct hist_entry *self) { size_t ret = 0; - ret += fprintf(fp, "%-20s", self->call ? self->call->name : "[unmatched]"); + ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); return ret; } -static struct sort_entry sort_call = { - .header = "Callchain symbol ", - .cmp = sort__call_cmp, - .print = sort__call_print, +static struct sort_entry sort_parent = { + .header = "Parent symbol ", + .cmp = sort__parent_cmp, + .print = sort__parent_print, }; static int sort__need_collapse = 0; -static int sort__has_call = 0; +static int sort__has_parent = 0; struct sort_dimension { char *name; @@ -662,7 +662,7 @@ static struct sort_dimension sort_dimensions[] = { { .name = "comm", .entry = &sort_comm, }, { .name = "dso", .entry = &sort_dso, }, { .name = "symbol", .entry = &sort_sym, }, - { .name = "call", .entry = &sort_call, }, + { .name = "parent", .entry = &sort_parent, }, }; static LIST_HEAD(hist_entry__sort_list); @@ -683,16 +683,17 @@ static int sort_dimension__add(char *tok) if (sd->entry->collapse) sort__need_collapse = 1; - if (sd->entry == &sort_call) { - int ret = regcomp(&call_regex, call, REG_EXTENDED); + if (sd->entry == &sort_parent) { + int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { char err[BUFSIZ]; - regerror(ret, &call_regex, err, sizeof(err)); - fprintf(stderr, "Invalid regex: %s\n%s", call, err); + regerror(ret, &parent_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", + parent_pattern, err); exit(-1); } - sort__has_call = 1; + sort__has_parent = 1; } list_add_tail(&sd->entry->list, &hist_entry__sort_list); @@ -831,7 +832,7 @@ static struct symbol *call__match(struct symbol *sym) if (!sym) return NULL; - if (sym->name && !regexec(&call_regex, sym->name, 0, NULL, 0)) + if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) return sym; return NULL; @@ -844,7 +845,7 @@ static struct symbol *call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct symbol *sym, __u64 ip, struct ip_chain_event *chain, - char level, __u64 count) + char level, __u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -860,7 +861,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - if (sort__has_call && chain) { + if (sort__has_parent && chain) { int i, nr = chain->hv; struct symbol *sym; struct dso *dso; @@ -870,22 +871,22 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, ip = chain->ips[nr + i]; dso = kernel_dso; sym = resolve_symbol(thread, NULL, &dso, &ip); - entry.call = call__match(sym); - if (entry.call) - goto got_call; + entry.parent = call__match(sym); + if (entry.parent) + goto got_parent; } nr += i; for (i = 0; i < chain->user; i++) { ip = chain->ips[nr + i]; sym = resolve_symbol(thread, NULL, NULL, &ip); - entry.call = call__match(sym); - if (entry.call) - goto got_call; + entry.parent = call__match(sym); + if (entry.parent) + goto got_parent; } nr += i; } -got_call: +got_parent: while (*p != NULL) { parent = *p; @@ -1457,11 +1458,11 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), + "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), - OPT_STRING('c', "call", &call, "regex", - "regex to use for --sort call"), + OPT_STRING('p', "parent", &parent_pattern, "regex", + "regex filter to identify parent, see: '--sort parent'"), OPT_END() }; From 7522060c95395f479ee4a6af3bbf9e097e92e48f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 08:00:17 +0200 Subject: [PATCH 29/49] perf report: Add validation of call-chain entries Add boundary checks for call-chain events. In case of corrupted entries we could crash otherwise. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 20 +++++----- tools/perf/builtin-report.c | 74 ++++++++++++++++++++++-------------- 2 files changed, 56 insertions(+), 38 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index eccae437fe37..a7d3a61a59b7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -337,6 +337,16 @@ enum perf_event_type { */ }; +#define MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u16 nr; + __u16 hv; + __u16 kernel; + __u16 user; + __u64 ip[MAX_STACK_DEPTH]; +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -652,16 +662,6 @@ extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); -#define MAX_STACK_DEPTH 255 - -struct perf_callchain_entry { - u16 nr; - u16 hv; - u16 kernel; - u16 user; - u64 ip[MAX_STACK_DEPTH]; -}; - extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_paranoid; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 986834623b43..e14e98676171 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -39,6 +39,8 @@ static int dump_trace = 0; #define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int verbose; +#define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) + static int full_paths; static unsigned long page_size; @@ -47,14 +49,6 @@ static unsigned long mmap_window = 32; static char *parent_pattern = "^sys_|^do_page_fault"; static regex_t parent_regex; -struct ip_chain_event { - __u16 nr; - __u16 hv; - __u16 kernel; - __u16 user; - __u64 ips[]; -}; - struct ip_event { struct perf_event_header header; __u64 ip; @@ -131,15 +125,11 @@ static struct dso *dsos__findnew(const char *name) nr = dso__load(dso, NULL, verbose); if (nr < 0) { - if (verbose) - fprintf(stderr, "Failed to open: %s\n", name); + eprintf("Failed to open: %s\n", name); goto out_delete_dso; } - if (!nr && verbose) { - fprintf(stderr, - "No symbols found in: %s, maybe install a debug package?\n", - name); - } + if (!nr) + eprintf("No symbols found in: %s, maybe install a debug package?\n", name); dsos__add(dso); @@ -844,7 +834,7 @@ static struct symbol *call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, struct ip_chain_event *chain, + struct symbol *sym, __u64 ip, struct perf_callchain_entry *chain, char level, __u64 count) { struct rb_node **p = &hist.rb_node; @@ -868,7 +858,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, __u64 ip; for (i = 0; i < chain->kernel; i++) { - ip = chain->ips[nr + i]; + ip = chain->ip[nr + i]; dso = kernel_dso; sym = resolve_symbol(thread, NULL, &dso, &ip); entry.parent = call__match(sym); @@ -878,7 +868,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, nr += i; for (i = 0; i < chain->user; i++) { - ip = chain->ips[nr + i]; + ip = chain->ip[nr + i]; sym = resolve_symbol(thread, NULL, NULL, &ip); entry.parent = call__match(sym); if (entry.parent) @@ -1080,6 +1070,30 @@ static unsigned long total = 0, total_fork = 0, total_unknown = 0; +static int validate_chain(struct perf_callchain_entry *chain, event_t *event) +{ + unsigned int chain_size; + + if (chain->nr > MAX_STACK_DEPTH) + return -1; + if (chain->hv > MAX_STACK_DEPTH) + return -1; + if (chain->kernel > MAX_STACK_DEPTH) + return -1; + if (chain->user > MAX_STACK_DEPTH) + return -1; + if (chain->hv + chain->kernel + chain->user != chain->nr) + return -1; + + chain_size = event->header.size; + chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; + + if (chain->nr*sizeof(__u64) > chain_size) + return -1; + + return 0; +} + static int process_overflow_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1091,7 +1105,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; - struct ip_chain_event *chain = NULL; + struct perf_callchain_entry *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { period = *(__u64 *)more_data; @@ -1111,21 +1125,26 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) chain = (void *)more_data; - if (dump_trace) { - dprintf("... chain: u:%d, k:%d, nr:%d\n", - chain->user, - chain->kernel, - chain->nr); + dprintf("... chain: u:%d, k:%d, nr:%d\n", + chain->user, + chain->kernel, + chain->nr); + if (validate_chain(chain, event) < 0) { + eprintf("call-chain problem with event, skipping it.\n"); + return 0; + } + + if (dump_trace) { for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); + dprintf("..... %2d: %016Lx\n", i, chain->ip[i]); } } dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { - fprintf(stderr, "problem processing %d event, skipping it.\n", + eprintf("problem processing %d event, skipping it.\n", event->header.type); return -1; } @@ -1153,8 +1172,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); + eprintf("problem incrementing symbol count, skipping event\n"); return -1; } } From a73c7d84a1975b44c0ebd03c2dec288af1426349 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 09:44:20 +0200 Subject: [PATCH 30/49] perf_counter tools: Add and use isprint() Introduce isprint() to print out raw event dumps to ASCII, etc. (This is an extension to upstream Git's ctype.c.) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: [ removed openssl.h inclusion from util.h - it leaked ctype.h ] Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- tools/perf/util/ctype.c | 17 +++++++++++------ tools/perf/util/util.h | 19 ++++++++----------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e14e98676171..9a3805f0c9f2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1285,7 +1285,7 @@ static void trace_event(event_t *event) for (j = 0; j < 15-(i & 15); j++) cdprintf(" "); for (j = 0; j < (i & 15); j++) { - if (issane(raw_event[i-15+j])) + if (isprint(raw_event[i-15+j])) cdprintf("%c", raw_event[i-15+j]); else cdprintf("."); diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index b90ec004f29c..0b791bd346bc 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -11,16 +11,21 @@ enum { D = GIT_DIGIT, G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ + P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */ + + PS = GIT_SPACE | GIT_PRINT_EXTRA, }; unsigned char sane_ctype[256] = { +/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ + PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ + D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ + P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */ + P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ + A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ce9b514f60a3..b8cfed776d81 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -100,11 +100,6 @@ #include #endif -#ifndef NO_OPENSSL -#include -#include -#endif - /* On most systems would have given us this, but * not on some systems (e.g. GNU/Hurd). */ @@ -332,18 +327,20 @@ static inline int has_extension(const char *filename, const char *ext) #undef tolower #undef toupper extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 +#define GIT_SPACE 0x01 +#define GIT_DIGIT 0x02 +#define GIT_ALPHA 0x04 +#define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define GIT_PRINT_EXTRA 0x20 +#define GIT_PRINT 0x3E #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isascii(x) (((x) & ~0x7f) == 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define issane(x) sane_istest(x,GIT_SPACE | GIT_DIGIT | GIT_ALPHA | GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) +#define isprint(x) sane_istest(x,GIT_PRINT) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) From 105988c015943e77092a6568bc5fb7e386df6ccd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:50:04 +1000 Subject: [PATCH 31/49] perf_counter: powerpc: Enable use of software counters on 32-bit powerpc This enables the perf_counter subsystem on 32-bit powerpc. Since we don't have any support for hardware counters on 32-bit powerpc yet, only software counters can be used. Besides selecting HAVE_PERF_COUNTERS for 32-bit powerpc as well as 64-bit, the main thing this does is add an implementation of set_perf_counter_pending(). This needs to arrange for perf_counter_do_pending() to be called when interrupts are enabled. Rather than add code to local_irq_restore as 64-bit does, the 32-bit set_perf_counter_pending() generates an interrupt by setting the decrementer to 1 so that a decrementer interrupt will become pending in 1 or 2 timebase ticks (if a decrementer interrupt isn't already pending). When interrupts are enabled, timer_interrupt() will be called, and some new code in there calls perf_counter_do_pending(). We use a per-cpu array of flags to indicate whether we need to call perf_counter_do_pending() or not. This introduces a couple of new Kconfig symbols: PPC_HAVE_PMU_SUPPORT, which is selected by processor families for which we have hardware PMU support (currently only PPC64), and PPC_PERF_CTRS, which enables the powerpc-specific perf_counter back-end. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55404.103840.393470@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/hw_irq.h | 5 ++++- arch/powerpc/include/asm/perf_counter.h | 10 ++++++++-- arch/powerpc/kernel/Makefile | 6 +++--- arch/powerpc/kernel/time.c | 25 +++++++++++++++++++++++++ arch/powerpc/platforms/Kconfig.cputype | 11 ++++++++++- 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9fb344d5a86a..bf6cedfa05db 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,6 +126,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 + select HAVE_PERF_COUNTERS config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 10a642df014e..867ab8ed69b3 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS + +#ifdef CONFIG_PPC64 static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -154,8 +156,9 @@ static inline void clear_perf_counter_pending(void) "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } +#endif /* CONFIG_PPC64 */ -#else +#else /* CONFIG_PERF_COUNTERS */ static inline unsigned long test_perf_counter_pending(void) { diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index b398a84edced..2c2d9f643df0 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -57,10 +57,16 @@ extern struct power_pmu *ppmu; struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) - extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +/* + * Only override the default definitions in include/linux/perf_counter.h + * if we have hardware PMU support. + */ +#ifdef CONFIG_PPC_PERF_CTRS +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + /* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 612b0c4dc26d..c5f93f061927 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -95,9 +95,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ - power5-pmu.o power5+-pmu.o power6-pmu.o \ - power7-pmu.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o +obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ + power5+-pmu.o power6-pmu.o power7-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 15391c2ab013..eae4511ceeac 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ +#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_counter_pending); + +void set_perf_counter_pending(void) +{ + get_cpu_var(perf_counter_pending) = 1; + set_dec(1); + put_cpu_var(perf_counter_pending); +} + +#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) +#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 + +#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ + +#define test_perf_counter_pending() 0 +#define clear_perf_counter_pending() + +#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ + /* * For iSeries shared processors, we have to let the hypervisor * set the hardware decrementer. We set a virtual decrementer @@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 + if (test_perf_counter_pending()) { + clear_perf_counter_pending(); + perf_counter_do_pending(); + } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index cca6b4fc719a..dd9f3ec5ee30 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,7 +1,7 @@ config PPC64 bool "64-bit kernel" default n - select HAVE_PERF_COUNTERS + select PPC_HAVE_PMU_SUPPORT help This option selects whether a 32-bit or a 64-bit kernel will be built. @@ -243,6 +243,15 @@ config VIRT_CPU_ACCOUNTING If in doubt, say Y here. +config PPC_HAVE_PMU_SUPPORT + bool + +config PPC_PERF_CTRS + def_bool y + depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT + help + This enables the powerpc-specific perf_counter back-end. + config SMP depends on PPC_STD_MMU || FSL_BOOKE bool "Symmetric multi-processing support" From 448d64f8f4c147db466c549550767cc515a4d34c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:51:13 +1000 Subject: [PATCH 32/49] perf_counter: powerpc: Use unsigned long for register and constraint values This changes the powerpc perf_counter back-end to use unsigned long types for hardware register values and for the value/mask pairs used in checking whether a given set of events fit within the hardware constraints. This is in preparation for adding support for the PMU on some 32-bit powerpc processors. On 32-bit processors the hardware registers are only 32 bits wide, and the PMU structure is generally simpler, so 32 bits should be ample for expressing the hardware constraints. On 64-bit processors, unsigned long is 64 bits wide, so using unsigned long vs. u64 (unsigned long long) makes no actual difference. This makes some other very minor changes: adjusting whitespace to line things up in initialized structures, and simplifying some code in hw_perf_disable(). Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55473.26174.331511@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 35 ++++++----- arch/powerpc/kernel/perf_counter.c | 20 +++--- arch/powerpc/kernel/power4-pmu.c | 74 +++++++++++----------- arch/powerpc/kernel/power5+-pmu.c | 79 ++++++++++++----------- arch/powerpc/kernel/power5-pmu.c | 83 +++++++++++++------------ arch/powerpc/kernel/power6-pmu.c | 57 ++++++++--------- arch/powerpc/kernel/power7-pmu.c | 46 +++++++------- arch/powerpc/kernel/ppc970-pmu.c | 47 +++++++------- 8 files changed, 229 insertions(+), 212 deletions(-) diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 2c2d9f643df0..2ceb0fefa93a 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -21,21 +21,22 @@ * describe the PMU on a particular POWER-family CPU. */ struct power_pmu { - int n_counter; - int max_alternatives; - u64 add_fields; - u64 test_adder; - int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], u64 mmcr[]); - int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); - int (*get_alternatives)(u64 event, unsigned int flags, - u64 alt[]); - void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); - int (*limited_pmc_event)(u64 event); - u32 flags; - int n_generic; - int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + int n_counter; + int max_alternatives; + unsigned long add_fields; + unsigned long test_adder; + int (*compute_mmcr)(u64 events[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]); + int (*get_constraint)(u64 event, unsigned long *mskp, + unsigned long *valp); + int (*get_alternatives)(u64 event, unsigned int flags, + u64 alt[]); + void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + int (*limited_pmc_event)(u64 event); + u32 flags; + int n_generic; + int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; }; @@ -68,8 +69,8 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); #endif /* - * The power_pmu.get_constraint function returns a 64-bit value and - * a 64-bit mask that express the constraints between this event and + * The power_pmu.get_constraint function returns a 32/64-bit value and + * a 32/64-bit mask that express the constraints between this event and * other events. * * The value and mask are divided up into (non-overlapping) bitfields diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index e6dc1850191c..9300638b8c26 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -29,7 +29,7 @@ struct cpu_hw_counters { struct perf_counter *counter[MAX_HWCOUNTERS]; u64 events[MAX_HWCOUNTERS]; unsigned int flags[MAX_HWCOUNTERS]; - u64 mmcr[3]; + unsigned long mmcr[3]; struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; }; @@ -135,15 +135,15 @@ static void write_pmc(int idx, unsigned long val) static int power_check_constraints(u64 event[], unsigned int cflags[], int n_ev) { - u64 mask, value, nv; + unsigned long mask, value, nv; u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; + unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; int i, j; - u64 addf = ppmu->add_fields; - u64 tadd = ppmu->test_adder; + unsigned long addf = ppmu->add_fields; + unsigned long tadd = ppmu->test_adder; if (n_ev > ppmu->n_counter) return -1; @@ -403,14 +403,12 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) void hw_perf_disable(void) { struct cpu_hw_counters *cpuhw; - unsigned long ret; unsigned long flags; local_irq_save(flags); cpuhw = &__get_cpu_var(cpu_hw_counters); - ret = cpuhw->disabled; - if (!ret) { + if (!cpuhw->disabled) { cpuhw->disabled = 1; cpuhw->n_added = 0; @@ -1013,9 +1011,9 @@ static void record_and_restart(struct perf_counter *counter, long val, struct pt_regs *regs, int nmi) { u64 period = counter->hw.sample_period; + unsigned long mmcra, sdsync; s64 prev, delta, left; int record = 0; - u64 mmcra, sdsync; /* we don't have to worry about interrupts here */ prev = atomic64_read(&counter->hw.prev_count); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 07bd308a5fa7..81a1708f83b2 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -179,22 +179,22 @@ static short mmcr1_adder_bits[8] = { */ static struct unitinfo { - u64 value, mask; - int unit; - int lowerbit; + unsigned long value, mask; + int unit; + int lowerbit; } p4_unitinfo[16] = { - [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, - [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, + [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, + [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, [PM_ISU1_ALT] = - { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, - [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, + { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, + [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, [PM_IFU_ALT] = - { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, - [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, - [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, - [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, - [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, - [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } + { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, + [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, + [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, + [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, + [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, + [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } }; static unsigned char direct_marked_event[8] = { @@ -249,10 +249,11 @@ static int p4_marked_instr_event(u64 event) return (mask >> (byte * 8 + bit)) & 1; } -static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int p4_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, lower, sh; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; int grp = -1; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -282,14 +283,14 @@ static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) value |= p4_unitinfo[unit].value; sh = p4_unitinfo[unit].lowerbit; if (sh > 1) - value |= (u64)lower << sh; + value |= (unsigned long)lower << sh; else if (lower != sh) return -1; unit = p4_unitinfo[unit].unit; /* Set byte lane select field */ mask |= 0xfULL << (28 - 4 * byte); - value |= (u64)unit << (28 - 4 * byte); + value |= (unsigned long)unit << (28 - 4 * byte); } if (grp == 0) { /* increment PMC1/2/5/6 field */ @@ -353,9 +354,9 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p4_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel, lower; unsigned int ttm, grp; unsigned int pmc_inuse = 0; @@ -429,9 +430,11 @@ static int p4_compute_mmcr(u64 event[], int n_ev, return -1; /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ - mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; - mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; - mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; + mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) + << MMCR1_TTM0SEL_SH; + mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) + << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; /* Set TTCxSEL fields. */ if (unitlower & 0xe) @@ -456,7 +459,8 @@ static int p4_compute_mmcr(u64 event[], int n_ev, ttm = unit - 1; /* 2->1, 3->2 */ else ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } } @@ -519,7 +523,7 @@ static int p4_compute_mmcr(u64 event[], int n_ev, return 0; } -static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { /* * Setting the PMCxSEL field to 0 disables PMC x. @@ -584,15 +588,15 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }; struct power_pmu power4_pmu = { - .n_counter = 8, - .max_alternatives = 5, - .add_fields = 0x0000001100005555ull, - .test_adder = 0x0011083300000000ull, - .compute_mmcr = p4_compute_mmcr, - .get_constraint = p4_get_constraint, - .get_alternatives = p4_get_alternatives, - .disable_pmc = p4_disable_pmc, - .n_generic = ARRAY_SIZE(p4_generic_events), - .generic_events = p4_generic_events, - .cache_events = &power4_cache_events, + .n_counter = 8, + .max_alternatives = 5, + .add_fields = 0x0000001100005555ul, + .test_adder = 0x0011083300000000ul, + .compute_mmcr = p4_compute_mmcr, + .get_constraint = p4_get_constraint, + .get_alternatives = p4_get_alternatives, + .disable_pmc = p4_disable_pmc, + .n_generic = ARRAY_SIZE(p4_generic_events), + .generic_events = p4_generic_events, + .cache_events = &power4_cache_events, }; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 41e5d2d958d4..aef144d503b0 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -126,20 +126,21 @@ static const int grsel_shift[8] = { }; /* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, - [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, - [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, - [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, - [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, - [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, +static unsigned long unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, + [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, + [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, + [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, + [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, + [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, }; -static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int power5p_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, sh; int bit, fmask; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { @@ -171,17 +172,18 @@ static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) bit = event & 7; fmask = (bit == 6)? 7: 3; sh = grsel_shift[bit]; - mask |= (u64)fmask << sh; - value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + mask |= (unsigned long)fmask << sh; + value |= (unsigned long)((event >> PM_GRS_SH) & fmask) + << sh; } /* Set byte lane select field */ - mask |= 0xfULL << (24 - 4 * byte); - value |= (u64)unit << (24 - 4 * byte); + mask |= 0xfUL << (24 - 4 * byte); + value |= (unsigned long)unit << (24 - 4 * byte); } if (pmc < 5) { /* need a counter from PMC1-4 set */ - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; + mask |= 0x8000000000000ul; + value |= 0x1000000000000ul; } *maskp = mask; *valp = value; @@ -452,10 +454,10 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm; int i, isbus, bit, grsel; @@ -517,7 +519,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; } ttmuse = 0; for (; i <= PM_GRS; ++i) { @@ -525,7 +527,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; } if (ttmuse > 1) return -1; @@ -540,10 +542,11 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, unit = PM_ISU0_ALT; } else if (unit == PM_LSU1 + 1) { /* select lower word of LSU1 for this byte */ - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); } ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ @@ -568,7 +571,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, if (isbus && (byte & 2) && (psel == 8 || psel == 0x10 || psel == 0x28)) /* add events on higher-numbered bus */ - mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); } else { /* Instructions or run cycles on PMC5/6 */ --pmc; @@ -576,7 +579,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, if (isbus && unit == PM_GRS) { bit = psel & 7; grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; - mmcr1 |= (u64)grsel << grsel_shift[bit]; + mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; } if (power5p_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; @@ -599,7 +602,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, return 0; } -static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { if (pmc <= 3) mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); @@ -655,17 +658,17 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }; struct power_pmu power5p_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x7000000000055ull, - .test_adder = 0x3000040000000ull, - .compute_mmcr = power5p_compute_mmcr, - .get_constraint = power5p_get_constraint, - .get_alternatives = power5p_get_alternatives, - .disable_pmc = power5p_disable_pmc, - .limited_pmc_event = power5p_limited_pmc_event, - .flags = PPMU_LIMITED_PMC5_6, - .n_generic = ARRAY_SIZE(power5p_generic_events), - .generic_events = power5p_generic_events, - .cache_events = &power5p_cache_events, + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000000000055ul, + .test_adder = 0x3000040000000ul, + .compute_mmcr = power5p_compute_mmcr, + .get_constraint = power5p_get_constraint, + .get_alternatives = power5p_get_alternatives, + .disable_pmc = power5p_disable_pmc, + .limited_pmc_event = power5p_limited_pmc_event, + .flags = PPMU_LIMITED_PMC5_6, + .n_generic = ARRAY_SIZE(power5p_generic_events), + .generic_events = power5p_generic_events, + .cache_events = &power5p_cache_events, }; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 05600b66221a..8694c73bfb52 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -130,20 +130,21 @@ static const int grsel_shift[8] = { }; /* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, - [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, - [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, - [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, - [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, - [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, +static unsigned long unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, + [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, + [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, + [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, + [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, + [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, }; -static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int power5_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, sh; int bit, fmask; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; int grp = -1; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -178,8 +179,9 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) bit = event & 7; fmask = (bit == 6)? 7: 3; sh = grsel_shift[bit]; - mask |= (u64)fmask << sh; - value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + mask |= (unsigned long)fmask << sh; + value |= (unsigned long)((event >> PM_GRS_SH) & fmask) + << sh; } /* * Bus events on bytes 0 and 2 can be counted @@ -188,22 +190,22 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) if (!pmc) grp = byte & 1; /* Set byte lane select field */ - mask |= 0xfULL << (24 - 4 * byte); - value |= (u64)unit << (24 - 4 * byte); + mask |= 0xfUL << (24 - 4 * byte); + value |= (unsigned long)unit << (24 - 4 * byte); } if (grp == 0) { /* increment PMC1/2 field */ - mask |= 0x200000000ull; - value |= 0x080000000ull; + mask |= 0x200000000ul; + value |= 0x080000000ul; } else if (grp == 1) { /* increment PMC3/4 field */ - mask |= 0x40000000ull; - value |= 0x10000000ull; + mask |= 0x40000000ul; + value |= 0x10000000ul; } if (pmc < 5) { /* need a counter from PMC1-4 set */ - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; + mask |= 0x8000000000000ul; + value |= 0x1000000000000ul; } *maskp = mask; *valp = value; @@ -383,10 +385,10 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; @@ -457,7 +459,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; } ttmuse = 0; for (; i <= PM_GRS; ++i) { @@ -465,7 +467,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, continue; if (ttmuse++) return -1; - mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; } if (ttmuse > 1) return -1; @@ -480,10 +482,11 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unit = PM_ISU0_ALT; } else if (unit == PM_LSU1 + 1) { /* select lower word of LSU1 for this byte */ - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); } ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ @@ -513,7 +516,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, --pmc; if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) /* add events on higher-numbered bus */ - mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); } else { /* Instructions or run cycles on PMC5/6 */ --pmc; @@ -521,7 +524,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, if (isbus && unit == PM_GRS) { bit = psel & 7; grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; - mmcr1 |= (u64)grsel << grsel_shift[bit]; + mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; } if (power5_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; @@ -541,7 +544,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, return 0; } -static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { if (pmc <= 3) mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); @@ -597,15 +600,15 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }; struct power_pmu power5_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x7000090000555ull, - .test_adder = 0x3000490000000ull, - .compute_mmcr = power5_compute_mmcr, - .get_constraint = power5_get_constraint, - .get_alternatives = power5_get_alternatives, - .disable_pmc = power5_disable_pmc, - .n_generic = ARRAY_SIZE(power5_generic_events), - .generic_events = power5_generic_events, - .cache_events = &power5_cache_events, + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000090000555ul, + .test_adder = 0x3000490000000ul, + .compute_mmcr = power5_compute_mmcr, + .get_constraint = power5_get_constraint, + .get_alternatives = power5_get_alternatives, + .disable_pmc = power5_disable_pmc, + .n_generic = ARRAY_SIZE(power5_generic_events), + .generic_events = power5_generic_events, + .cache_events = &power5_cache_events, }; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 46f74bebcfd9..8898622ac28c 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -41,9 +41,9 @@ #define MMCR1_NESTSEL_SH 45 #define MMCR1_NESTSEL_MSK 0x7 #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) -#define MMCR1_PMC1_LLA ((u64)1 << 44) -#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) -#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) +#define MMCR1_PMC1_LLA (1ul << 44) +#define MMCR1_PMC1_LLA_VALUE (1ul << 39) +#define MMCR1_PMC1_ADDR_SEL (1ul << 35) #define MMCR1_PMC1SEL_SH 24 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_MSK 0xff @@ -173,10 +173,10 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; @@ -215,7 +215,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, /* check for conflict on this byte of event bus */ if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) return -1; - mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); + mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b); ttmset |= 1 << b; if (u == 5) { /* Nest events have a further mux */ @@ -224,7 +224,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, MMCR1_NESTSEL(mmcr1) != s) return -1; ttmset |= 0x10; - mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; + mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH; } if (0x30 <= psel && psel <= 0x3d) { /* these need the PMCx_ADDR_SEL bits */ @@ -243,7 +243,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, if (power6_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; if (pmc < 4) - mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); + mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); } mmcr[0] = 0; if (pmc_inuse & 1) @@ -265,10 +265,11 @@ static int p6_compute_mmcr(u64 event[], int n_ev, * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 * 32-34 select field: nest (subunit) event selector */ -static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int p6_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, sh, subunit; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { @@ -282,11 +283,11 @@ static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; sh = byte * 4 + (16 - PM_UNIT_SH); mask |= PM_UNIT_MSKS << sh; - value |= (u64)(event & PM_UNIT_MSKS) << sh; + value |= (unsigned long)(event & PM_UNIT_MSKS) << sh; if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; - mask |= (u64)PM_SUBUNIT_MSK << 32; - value |= (u64)subunit << 32; + mask |= (unsigned long)PM_SUBUNIT_MSK << 32; + value |= (unsigned long)subunit << 32; } } if (pmc <= 4) { @@ -458,7 +459,7 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return nalt; } -static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { /* Set PMCxSEL to 0 to disable PMCx */ if (pmc <= 3) @@ -516,17 +517,17 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }; struct power_pmu power6_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x1555, - .test_adder = 0x3000, - .compute_mmcr = p6_compute_mmcr, - .get_constraint = p6_get_constraint, - .get_alternatives = p6_get_alternatives, - .disable_pmc = p6_disable_pmc, - .limited_pmc_event = p6_limited_pmc_event, - .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, - .n_generic = ARRAY_SIZE(power6_generic_events), - .generic_events = power6_generic_events, - .cache_events = &power6_cache_events, + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x1555, + .test_adder = 0x3000, + .compute_mmcr = p6_compute_mmcr, + .get_constraint = p6_get_constraint, + .get_alternatives = p6_get_alternatives, + .disable_pmc = p6_disable_pmc, + .limited_pmc_event = p6_limited_pmc_event, + .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, + .n_generic = ARRAY_SIZE(power6_generic_events), + .generic_events = power6_generic_events, + .cache_events = &power6_cache_events, }; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index b72e7a19d054..658d1ae436a0 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -71,10 +71,11 @@ * 0-9: Count of events needing PMC1..PMC5 */ -static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int power7_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, sh; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { @@ -224,10 +225,10 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr1 = 0; - u64 mmcra = 0; + unsigned long mmcr1 = 0; + unsigned long mmcra = 0; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; @@ -265,11 +266,14 @@ static int power7_compute_mmcr(u64 event[], int n_ev, --pmc; } if (pmc <= 3) { - mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); - mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); + mmcr1 |= (unsigned long) unit + << (MMCR1_TTM0SEL_SH - 4 * pmc); + mmcr1 |= (unsigned long) combine + << (MMCR1_PMC1_COMBINE_SH - pmc); mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); if (unit == 6) /* L2 events */ - mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; + mmcr1 |= (unsigned long) l2sel + << MMCR1_L2SEL_SH; } if (power7_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; @@ -287,10 +291,10 @@ static int power7_compute_mmcr(u64 event[], int n_ev, return 0; } -static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { if (pmc <= 3) - mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power7_generic_events[] = { @@ -343,15 +347,15 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }; struct power_pmu power7_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT + 1, - .add_fields = 0x1555ull, - .test_adder = 0x3000ull, - .compute_mmcr = power7_compute_mmcr, - .get_constraint = power7_get_constraint, - .get_alternatives = power7_get_alternatives, - .disable_pmc = power7_disable_pmc, - .n_generic = ARRAY_SIZE(power7_generic_events), - .generic_events = power7_generic_events, - .cache_events = &power7_cache_events, + .n_counter = 6, + .max_alternatives = MAX_ALT + 1, + .add_fields = 0x1555ul, + .test_adder = 0x3000ul, + .compute_mmcr = power7_compute_mmcr, + .get_constraint = power7_get_constraint, + .get_alternatives = power7_get_alternatives, + .disable_pmc = power7_disable_pmc, + .n_generic = ARRAY_SIZE(power7_generic_events), + .generic_events = power7_generic_events, + .cache_events = &power7_cache_events, }; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index ba0a357a89f4..3ed88333412f 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -183,7 +183,7 @@ static int p970_marked_instr_event(u64 event) } /* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { +static unsigned long unit_cons[PM_LASTUNIT+1][2] = { [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, @@ -192,10 +192,11 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, }; -static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) +static int p970_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) { int pmc, byte, unit, sh, spcsel; - u64 mask = 0, value = 0; + unsigned long mask = 0, value = 0; int grp = -1; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -222,7 +223,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) grp = byte & 1; /* Set byte lane select field */ mask |= 0xfULL << (28 - 4 * byte); - value |= (u64)unit << (28 - 4 * byte); + value |= (unsigned long)unit << (28 - 4 * byte); } if (grp == 0) { /* increment PMC1/2/5/6 field */ @@ -236,7 +237,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; if (spcsel) { mask |= 3ull << 48; - value |= (u64)spcsel << 48; + value |= (unsigned long)spcsel << 48; } *maskp = mask; *valp = value; @@ -257,9 +258,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) + unsigned int hwc[], unsigned long mmcr[]) { - u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; unsigned int pmc_inuse = 0; @@ -320,7 +321,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, continue; ttm = unitmap[i]; ++ttmuse[(ttm >> 2) & 1]; - mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; + mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH; } /* Check only one unit per TTMx */ if (ttmuse[0] > 1 || ttmuse[1] > 1) @@ -340,7 +341,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, if (unit == PM_LSU1L && byte >= 2) mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); } - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + mmcr1 |= (unsigned long)ttm + << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); } /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ @@ -386,7 +388,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, for (pmc = 0; pmc < 2; ++pmc) mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); for (; pmc < 8; ++pmc) - mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); + mmcr1 |= (unsigned long)pmcsel[pmc] + << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); if (pmc_inuse & 1) mmcr0 |= MMCR0_PMC1CE; if (pmc_inuse & 0xfe) @@ -401,7 +404,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, return 0; } -static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) +static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) { int shift, i; @@ -468,15 +471,15 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }; struct power_pmu ppc970_pmu = { - .n_counter = 8, - .max_alternatives = 2, - .add_fields = 0x001100005555ull, - .test_adder = 0x013300000000ull, - .compute_mmcr = p970_compute_mmcr, - .get_constraint = p970_get_constraint, - .get_alternatives = p970_get_alternatives, - .disable_pmc = p970_disable_pmc, - .n_generic = ARRAY_SIZE(ppc970_generic_events), - .generic_events = ppc970_generic_events, - .cache_events = &ppc970_cache_events, + .n_counter = 8, + .max_alternatives = 2, + .add_fields = 0x001100005555ull, + .test_adder = 0x013300000000ull, + .compute_mmcr = p970_compute_mmcr, + .get_constraint = p970_get_constraint, + .get_alternatives = p970_get_alternatives, + .disable_pmc = p970_disable_pmc, + .n_generic = ARRAY_SIZE(ppc970_generic_events), + .generic_events = ppc970_generic_events, + .cache_events = &ppc970_cache_events, }; From 079b3c569c87819e7a19d9b9f51d4746fc47bf9a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:52:09 +1000 Subject: [PATCH 33/49] perf_counter: powerpc: Change how processor-specific back-ends get selected At present, the powerpc generic (processor-independent) perf_counter code has list of processor back-end modules, and at initialization, it looks at the PVR (processor version register) and has a switch statement to select a suitable processor-specific back-end. This is going to become inconvenient as we add more processor-specific back-ends, so this inverts the order: now each back-end checks whether it applies to the current processor, and registers itself if so. Furthermore, instead of looking at the PVR, back-ends now check the cur_cpu_spec->oprofile_cpu_type string and match on that. Lastly, each back-end now specifies a name for itself so the core can print a nice message when a back-end registers itself. This doesn't provide any support for unregistering back-ends, but that wouldn't be hard to do and would allow back-ends to be modules. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55529.762227.518531@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 5 +-- arch/powerpc/kernel/perf_counter.c | 42 ++++--------------------- arch/powerpc/kernel/power4-pmu.c | 15 ++++++++- arch/powerpc/kernel/power5+-pmu.c | 16 +++++++++- arch/powerpc/kernel/power5-pmu.c | 15 ++++++++- arch/powerpc/kernel/power6-pmu.c | 15 ++++++++- arch/powerpc/kernel/power7-pmu.c | 15 ++++++++- arch/powerpc/kernel/ppc970-pmu.c | 16 +++++++++- 8 files changed, 95 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 2ceb0fefa93a..8ccd4e155768 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -21,6 +21,7 @@ * describe the PMU on a particular POWER-family CPU. */ struct power_pmu { + const char *name; int n_counter; int max_alternatives; unsigned long add_fields; @@ -41,8 +42,6 @@ struct power_pmu { [PERF_COUNT_HW_CACHE_RESULT_MAX]; }; -extern struct power_pmu *ppmu; - /* * Values for power_pmu.flags */ @@ -56,6 +55,8 @@ extern struct power_pmu *ppmu; #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ +extern int register_power_pmu(struct power_pmu *); + struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 9300638b8c26..25e656c14945 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -1214,42 +1214,14 @@ void hw_perf_counter_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } -extern struct power_pmu power4_pmu; -extern struct power_pmu ppc970_pmu; -extern struct power_pmu power5_pmu; -extern struct power_pmu power5p_pmu; -extern struct power_pmu power6_pmu; -extern struct power_pmu power7_pmu; - -static int init_perf_counters(void) +int register_power_pmu(struct power_pmu *pmu) { - unsigned long pvr; + if (ppmu) + return -EBUSY; /* something's already registered */ - /* XXX should get this from cputable */ - pvr = mfspr(SPRN_PVR); - switch (PVR_VER(pvr)) { - case PV_POWER4: - case PV_POWER4p: - ppmu = &power4_pmu; - break; - case PV_970: - case PV_970FX: - case PV_970MP: - ppmu = &ppc970_pmu; - break; - case PV_POWER5: - ppmu = &power5_pmu; - break; - case PV_POWER5p: - ppmu = &power5p_pmu; - break; - case 0x3e: - ppmu = &power6_pmu; - break; - case 0x3f: - ppmu = &power7_pmu; - break; - } + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); /* * Use FCHV to ignore kernel events if MSR.HV is set. @@ -1259,5 +1231,3 @@ static int init_perf_counters(void) return 0; } - -arch_initcall(init_perf_counters); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 81a1708f83b2..db90b0c5c27b 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -10,7 +10,9 @@ */ #include #include +#include #include +#include /* * Bits in event code for POWER4 @@ -587,7 +589,8 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power4_pmu = { +static struct power_pmu power4_pmu = { + .name = "POWER4/4+", .n_counter = 8, .max_alternatives = 5, .add_fields = 0x0000001100005555ul, @@ -600,3 +603,13 @@ struct power_pmu power4_pmu = { .generic_events = p4_generic_events, .cache_events = &power4_cache_events, }; + +static int init_power4_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) + return -ENODEV; + + return register_power_pmu(&power4_pmu); +} + +arch_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index aef144d503b0..f4adca8e98a4 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -10,7 +10,9 @@ */ #include #include +#include #include +#include /* * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) @@ -657,7 +659,8 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power5p_pmu = { +static struct power_pmu power5p_pmu = { + .name = "POWER5+/++", .n_counter = 6, .max_alternatives = MAX_ALT, .add_fields = 0x7000000000055ul, @@ -672,3 +675,14 @@ struct power_pmu power5p_pmu = { .generic_events = power5p_generic_events, .cache_events = &power5p_cache_events, }; + +static int init_power5p_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") + && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")) + return -ENODEV; + + return register_power_pmu(&power5p_pmu); +} + +arch_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 8694c73bfb52..29b2c6c0e83a 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -10,7 +10,9 @@ */ #include #include +#include #include +#include /* * Bits in event code for POWER5 (not POWER5++) @@ -599,7 +601,8 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power5_pmu = { +static struct power_pmu power5_pmu = { + .name = "POWER5", .n_counter = 6, .max_alternatives = MAX_ALT, .add_fields = 0x7000090000555ul, @@ -612,3 +615,13 @@ struct power_pmu power5_pmu = { .generic_events = power5_generic_events, .cache_events = &power5_cache_events, }; + +static int init_power5_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) + return -ENODEV; + + return register_power_pmu(&power5_pmu); +} + +arch_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 8898622ac28c..09ae5bf5bda7 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -10,7 +10,9 @@ */ #include #include +#include #include +#include /* * Bits in event code for POWER6 @@ -516,7 +518,8 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power6_pmu = { +static struct power_pmu power6_pmu = { + .name = "POWER6", .n_counter = 6, .max_alternatives = MAX_ALT, .add_fields = 0x1555, @@ -531,3 +534,13 @@ struct power_pmu power6_pmu = { .generic_events = power6_generic_events, .cache_events = &power6_cache_events, }; + +static int init_power6_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) + return -ENODEV; + + return register_power_pmu(&power6_pmu); +} + +arch_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 658d1ae436a0..5d755ef7ac8f 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -10,7 +10,9 @@ */ #include #include +#include #include +#include /* * Bits in event code for POWER7 @@ -346,7 +348,8 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu power7_pmu = { +static struct power_pmu power7_pmu = { + .name = "POWER7", .n_counter = 6, .max_alternatives = MAX_ALT + 1, .add_fields = 0x1555ul, @@ -359,3 +362,13 @@ struct power_pmu power7_pmu = { .generic_events = power7_generic_events, .cache_events = &power7_cache_events, }; + +static int init_power7_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) + return -ENODEV; + + return register_power_pmu(&power7_pmu); +} + +arch_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3ed88333412f..6637c87fe70e 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -10,7 +10,9 @@ */ #include #include +#include #include +#include /* * Bits in event code for PPC970 @@ -470,7 +472,8 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; -struct power_pmu ppc970_pmu = { +static struct power_pmu ppc970_pmu = { + .name = "PPC970/FX/MP", .n_counter = 8, .max_alternatives = 2, .add_fields = 0x001100005555ull, @@ -483,3 +486,14 @@ struct power_pmu ppc970_pmu = { .generic_events = ppc970_generic_events, .cache_events = &ppc970_cache_events, }; + +static int init_ppc970_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") + && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")) + return -ENODEV; + + return register_power_pmu(&ppc970_pmu); +} + +arch_initcall(init_ppc970_pmu); From 98fb1807b97e3e631b940f67544e265c64b984dc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:53:10 +1000 Subject: [PATCH 34/49] perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels This abstracts a few things in arch/powerpc/kernel/perf_counter.c that are specific to 64-bit kernels, and provides definitions for 32-bit kernels. In particular, * Only 64-bit has MMCRA and the bits in it that give information about a PMU interrupt (sampled PR, HV, slot number etc.) * Only 64-bit has the lppaca and the lppaca->pmcregs_in_use field * Use of SDAR is confined to 64-bit for now * Only 64-bit has soft/lazy interrupt disable and therefore pseudo-NMIs (interrupts that occur while interrupts are soft-disabled) * Only 64-bit has PMC7 and PMC8 * Only 64-bit has the MSR_HV bit. This also fixes the types used in a couple of places, where we were using long types for things that need to be 64-bit. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55590.634126.876084@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 193 ++++++++++++++++++++--------- 1 file changed, 133 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 25e656c14945..809fdf94b95f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -46,6 +46,115 @@ struct power_pmu *ppmu; */ static unsigned int freeze_counters_kernel = MMCR0_FCS; +/* + * 32-bit doesn't have MMCRA but does have an MMCR2, + * and a few other names are different. + */ +#ifdef CONFIG_PPC32 + +#define MMCR0_FCHV 0 +#define MMCR0_PMCjCE MMCR0_PMCnCE + +#define SPRN_MMCRA SPRN_MMCR2 +#define MMCRA_SAMPLE_ENABLE 0 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_set_pmu_inuse(int inuse) { } +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_read_regs(struct pt_regs *regs) { } +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return 0; +} + +#endif /* CONFIG_PPC32 */ + +/* + * Things that are specific to 64-bit implementations. + */ +#ifdef CONFIG_PPC64 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + return 4 * (slot - 1); + } + return 0; +} + +static inline void perf_set_pmu_inuse(int inuse) +{ + get_lppaca()->pmcregs_in_use = inuse; +} + +/* + * The user wants a data address recorded. + * If we're not doing instruction sampling, give them the SDAR + * (sampled data address). If we are doing instruction sampling, then + * only give them the SDAR if it corresponds to the instruction + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC + * bit in MMCRA. + */ +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +{ + unsigned long mmcra = regs->dsisr; + unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + *addrp = mfspr(SPRN_SDAR); +} + +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if (TRAP(regs) != 0xf00) + return 0; /* not a PMU interrupt */ + + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_EVENT_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? + PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_EVENT_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; +} + +/* + * Overload regs->dsisr to store MMCRA so we only need to read it once + * on each interrupt. + */ +static inline void perf_read_regs(struct pt_regs *regs) +{ + regs->dsisr = mfspr(SPRN_MMCRA); +} + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return !regs->softe; +} + +#endif /* CONFIG_PPC64 */ + static void perf_counter_interrupt(struct pt_regs *regs); void perf_counter_print_debug(void) @@ -78,12 +187,14 @@ static unsigned long read_pmc(int idx) case 6: val = mfspr(SPRN_PMC6); break; +#ifdef CONFIG_PPC64 case 7: val = mfspr(SPRN_PMC7); break; case 8: val = mfspr(SPRN_PMC8); break; +#endif /* CONFIG_PPC64 */ default: printk(KERN_ERR "oops trying to read PMC%d\n", idx); val = 0; @@ -115,12 +226,14 @@ static void write_pmc(int idx, unsigned long val) case 6: mtspr(SPRN_PMC6, val); break; +#ifdef CONFIG_PPC64 case 7: mtspr(SPRN_PMC7, val); break; case 8: mtspr(SPRN_PMC8, val); break; +#endif /* CONFIG_PPC64 */ default: printk(KERN_ERR "oops trying to write PMC%d\n", idx); } @@ -283,7 +396,7 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], static void power_pmu_read(struct perf_counter *counter) { - long val, delta, prev; + s64 val, delta, prev; if (!counter->hw.idx) return; @@ -477,7 +590,7 @@ void hw_perf_enable(void) mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); if (cpuhw->n_counters == 0) - get_lppaca()->pmcregs_in_use = 0; + perf_set_pmu_inuse(0); goto out_enable; } @@ -510,7 +623,7 @@ void hw_perf_enable(void) * bit set and set the hardware counters to their initial values. * Then unfreeze the counters. */ - get_lppaca()->pmcregs_in_use = 1; + perf_set_pmu_inuse(1); mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) @@ -1007,11 +1120,10 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) * things if requested. Note that interrupts are hard-disabled * here so there is no possibility of being interrupted. */ -static void record_and_restart(struct perf_counter *counter, long val, +static void record_and_restart(struct perf_counter *counter, unsigned long val, struct pt_regs *regs, int nmi) { u64 period = counter->hw.sample_period; - unsigned long mmcra, sdsync; s64 prev, delta, left; int record = 0; @@ -1033,8 +1145,8 @@ static void record_and_restart(struct perf_counter *counter, long val, left = period; record = 1; } - if (left < 0x80000000L) - val = 0x80000000L - left; + if (left < 0x80000000LL) + val = 0x80000000LL - left; } /* @@ -1047,22 +1159,9 @@ static void record_and_restart(struct perf_counter *counter, long val, .period = counter->hw.last_period, }; - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { - /* - * The user wants a data address recorded. - * If we're not doing instruction sampling, - * give them the SDAR (sampled data address). - * If we are doing instruction sampling, then only - * give them the SDAR if it corresponds to the - * instruction pointed to by SIAR; this is indicated - * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. - */ - mmcra = regs->dsisr; - sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - data.addr = mfspr(SPRN_SDAR); - } + if (counter->attr.sample_type & PERF_SAMPLE_ADDR) + perf_get_data_addr(regs, &data.addr); + if (perf_counter_overflow(counter, nmi, &data)) { /* * Interrupts are coming too fast - throttle them @@ -1088,25 +1187,12 @@ static void record_and_restart(struct perf_counter *counter, long val, */ unsigned long perf_misc_flags(struct pt_regs *regs) { - unsigned long mmcra; + u32 flags = perf_get_misc_flags(regs); - if (TRAP(regs) != 0xf00) { - /* not a PMU interrupt */ - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; - } - - mmcra = regs->dsisr; - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; + if (flags) + return flags; + return user_mode(regs) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; } /* @@ -1115,20 +1201,12 @@ unsigned long perf_misc_flags(struct pt_regs *regs) */ unsigned long perf_instruction_pointer(struct pt_regs *regs) { - unsigned long mmcra; unsigned long ip; - unsigned long slot; if (TRAP(regs) != 0xf00) return regs->nip; /* not a PMU interrupt */ - ip = mfspr(SPRN_SIAR); - mmcra = regs->dsisr; - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - ip += 4 * (slot - 1); - } + ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); return ip; } @@ -1140,7 +1218,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) int i; struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; - long val; + unsigned long val; int found = 0; int nmi; @@ -1148,16 +1226,9 @@ static void perf_counter_interrupt(struct pt_regs *regs) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), mfspr(SPRN_PMC6)); - /* - * Overload regs->dsisr to store MMCRA so we only need to read it once. - */ - regs->dsisr = mfspr(SPRN_MMCRA); + perf_read_regs(regs); - /* - * If interrupts were soft-disabled when this PMU interrupt - * occurred, treat it as an NMI. - */ - nmi = !regs->softe; + nmi = perf_intr_is_nmi(regs); if (nmi) nmi_enter(); else @@ -1223,11 +1294,13 @@ int register_power_pmu(struct power_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); +#ifdef MSR_HV /* * Use FCHV to ignore kernel events if MSR.HV is set. */ if (mfmsr() & MSR_HV) freeze_counters_kernel = MMCR0_FCHV; +#endif /* CONFIG_PPC64 */ return 0; } From 7325927e5a20bfe0f006acf92801bf41c537d3d4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:53:51 +1000 Subject: [PATCH 35/49] perf_counter: powerpc: Add processor back-end for MPC7450 family This adds support for the performance monitor hardware on the MPC7450 family of processors (7450, 7451, 7455, 7447/7457, 7447A, 7448), used in the later Apple G4 powermacs/powerbooks and other machines. These machines have 6 hardware counters with a unique set of events which can be counted on each counter, with some events being available on multiple counters. Raw event codes for these processors are (PMC << 8) + PMCSEL. If PMC is non-zero then the event is that selected by the given PMCSEL value for that PMC (hardware counter). If PMC is zero then the event selected is one of the low-numbered ones that are common to several PMCs. In this case PMCSEL must be <= 22 and the event is what that PMCSEL value would select on PMC1 (but it may be placed any other PMC that has the same event for that PMCSEL value). For events that count cycles or occurrences that exceed a threshold, the threshold requested can be specified in the 0x3f000 bits of the raw event codes. If the event uses the threshold multiplier bit and that bit should be set, that is indicated with the 0x40000 bit of the raw event code. This fills in some of the generic cache events. Unfortunately there are quite a few blank spaces in the table, partly because these processors tend to count cache hits rather than cache accesses. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55631.802122.696927@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/Makefile | 2 + arch/powerpc/kernel/mpc7450-pmu.c | 417 +++++++++++++++++++++++++ arch/powerpc/platforms/Kconfig.cputype | 1 + 3 files changed, 420 insertions(+) create mode 100644 arch/powerpc/kernel/mpc7450-pmu.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c5f93f061927..a9f882963379 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -98,6 +98,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o +obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o @@ -106,6 +107,7 @@ obj-y += iomap.o endif obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c new file mode 100644 index 000000000000..75ff47fed7bf --- /dev/null +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -0,0 +1,417 @@ +/* + * Performance counter support for MPC7450-family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + +#define N_COUNTER 6 /* Number of hardware counters */ +#define MAX_ALT 3 /* Maximum number of event alternative codes */ + +/* + * Bits in event code for MPC7450 family + */ +#define PM_THRMULT_MSKS 0x40000 +#define PM_THRESH_SH 12 +#define PM_THRESH_MSK 0x3f +#define PM_PMC_SH 8 +#define PM_PMC_MSK 7 +#define PM_PMCSEL_MSK 0x7f + +/* + * Classify events according to how specific their PMC requirements are. + * Result is: + * 0: can go on any PMC + * 1: can go on PMCs 1-4 + * 2: can go on PMCs 1,2,4 + * 3: can go on PMCs 1 or 2 + * 4: can only go on one PMC + * -1: event code is invalid + */ +#define N_CLASSES 5 + +static int mpc7450_classify_event(u32 event) +{ + int pmc; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > N_COUNTER) + return -1; + return 4; + } + event &= PM_PMCSEL_MSK; + if (event <= 1) + return 0; + if (event <= 7) + return 1; + if (event <= 13) + return 2; + if (event <= 22) + return 3; + return -1; +} + +/* + * Events using threshold and possible threshold scale: + * code scale? name + * 11e N PM_INSTQ_EXCEED_CYC + * 11f N PM_ALTV_IQ_EXCEED_CYC + * 128 Y PM_DTLB_SEARCH_EXCEED_CYC + * 12b Y PM_LD_MISS_EXCEED_L1_CYC + * 220 N PM_CQ_EXCEED_CYC + * 30c N PM_GPR_RB_EXCEED_CYC + * 30d ? PM_FPR_IQ_EXCEED_CYC ? + * 311 Y PM_ITLB_SEARCH_EXCEED + * 410 N PM_GPR_IQ_EXCEED_CYC + */ + +/* + * Return use of threshold and threshold scale bits: + * 0 = uses neither, 1 = uses threshold, 2 = uses both + */ +static int mpc7450_threshold_use(u32 event) +{ + int pmc, sel; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + sel = event & PM_PMCSEL_MSK; + switch (pmc) { + case 1: + if (sel == 0x1e || sel == 0x1f) + return 1; + if (sel == 0x28 || sel == 0x2b) + return 2; + break; + case 2: + if (sel == 0x20) + return 1; + break; + case 3: + if (sel == 0xc || sel == 0xd) + return 1; + if (sel == 0x11) + return 2; + break; + case 4: + if (sel == 0x10) + return 1; + break; + } + return 0; +} + +/* + * Layout of constraint bits: + * 33222222222211111111110000000000 + * 10987654321098765432109876543210 + * |< >< > < > < ><><><><><><> + * TS TV G4 G3 G2P6P5P4P3P2P1 + * + * P1 - P6 + * 0 - 11: Count of events needing PMC1 .. PMC6 + * + * G2 + * 12 - 14: Count of events needing PMC1 or PMC2 + * + * G3 + * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 + * + * G4 + * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 + * + * TV + * 24 - 29: Threshold value requested + * + * TS + * 30: Threshold scale value requested + */ + +static u32 pmcbits[N_COUNTER][2] = { + { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ + { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ + { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ + { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ + { 0x00000200, 0x00000100 }, /* PMC5: P5 */ + { 0x00000800, 0x00000400 } /* PMC6: P6 */ +}; + +static u32 classbits[N_CLASSES - 1][2] = { + { 0x00000000, 0x00000000 }, /* class 0: no constraint */ + { 0x00800000, 0x00100000 }, /* class 1: G4 */ + { 0x00040000, 0x00010000 }, /* class 2: G3 */ + { 0x00004000, 0x00001000 }, /* class 3: G2 */ +}; + +static int mpc7450_get_constraint(u64 event, unsigned long *maskp, + unsigned long *valp) +{ + int pmc, class; + u32 mask, value; + int thresh, tuse; + + class = mpc7450_classify_event(event); + if (class < 0) + return -1; + if (class == 4) { + pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; + mask = pmcbits[pmc - 1][0]; + value = pmcbits[pmc - 1][1]; + } else { + mask = classbits[class][0]; + value = classbits[class][1]; + } + + tuse = mpc7450_threshold_use(event); + if (tuse) { + thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; + mask |= 0x3f << 24; + value |= thresh << 24; + if (tuse == 2) { + mask |= 0x40000000; + if ((unsigned int)event & PM_THRMULT_MSKS) + value |= 0x40000000; + } + } + + *maskp = mask; + *valp = value; + return 0; +} + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ + { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ + { 0x502, 0x602 }, /* PM_L2_HIT */ + { 0x503, 0x603 }, /* PM_L3_HIT */ + { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ + { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ + { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ + { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ + { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ + { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ + { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ + { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ + { 0x512, 0x612 }, /* PM_INT_LOCAL */ + { 0x513, 0x61d }, /* PM_L2_MISS */ + { 0x514, 0x61e }, /* PM_L3_MISS */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(u32 event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int i, j, nalt = 1; + u32 ae; + + alt[0] = event; + nalt = 1; + i = find_alternative((u32)event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != (u32)event) + alt[nalt++] = ae; + } + } + return nalt; +} + +/* + * Bitmaps of which PMCs each class can use for classes 0 - 3. + * Bit i is set if PMC i+1 is usable. + */ +static const u8 classmap[N_CLASSES] = { + 0x3f, 0x0f, 0x0b, 0x03, 0 +}; + +/* Bit position and width of each PMCSEL field */ +static const int pmcsel_shift[N_COUNTER] = { + 6, 0, 27, 22, 17, 11 +}; +static const u32 pmcsel_mask[N_COUNTER] = { + 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f +}; + +/* + * Compute MMCR0/1/2 values for a set of events. + */ +static int mpc7450_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]) +{ + u8 event_index[N_CLASSES][N_COUNTER]; + int n_classevent[N_CLASSES]; + int i, j, class, tuse; + u32 pmc_inuse = 0, pmc_avail; + u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; + u32 ev, pmc, thresh; + + if (n_ev > N_COUNTER) + return -1; + + /* First pass: count usage in each class */ + for (i = 0; i < N_CLASSES; ++i) + n_classevent[i] = 0; + for (i = 0; i < n_ev; ++i) { + class = mpc7450_classify_event(event[i]); + if (class < 0) + return -1; + j = n_classevent[class]++; + event_index[class][j] = i; + } + + /* Second pass: allocate PMCs from most specific event to least */ + for (class = N_CLASSES - 1; class >= 0; --class) { + for (i = 0; i < n_classevent[class]; ++i) { + ev = event[event_index[class][i]]; + if (class == 4) { + pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + } else { + /* Find a suitable PMC */ + pmc_avail = classmap[class] & ~pmc_inuse; + if (!pmc_avail) + return -1; + pmc = ffs(pmc_avail); + } + pmc_inuse |= 1 << (pmc - 1); + + tuse = mpc7450_threshold_use(ev); + if (tuse) { + thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; + mmcr0 |= thresh << 16; + if (tuse == 2 && (ev & PM_THRMULT_MSKS)) + mmcr2 = 0x80000000; + } + ev &= pmcsel_mask[pmc - 1]; + ev <<= pmcsel_shift[pmc - 1]; + if (pmc <= 2) + mmcr0 |= ev; + else + mmcr1 |= ev; + hwc[event_index[class][i]] = pmc - 1; + } + } + + if (pmc_inuse & 1) + mmcr0 |= MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr0 |= MMCR0_PMCnCE; + + /* Return MMCRx values */ + mmcr[0] = mmcr0; + mmcr[1] = mmcr1; + mmcr[2] = mmcr2; + return 0; +} + +/* + * Disable counting by a PMC. + * Note that the pmc argument is 0-based here, not 1-based. + */ +static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +{ + if (pmc <= 1) + mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + else + mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); +} + +static int mpc7450_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x225 }, + [C(OP_WRITE)] = { 0, 0x227 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x129, 0x115 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0x634, 0 }, + }, + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x312 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x223 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x122, 0x41c }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +struct power_pmu mpc7450_pmu = { + .name = "MPC7450 family", + .n_counter = N_COUNTER, + .max_alternatives = MAX_ALT, + .add_fields = 0x00111555ul, + .test_adder = 0x00301000ul, + .compute_mmcr = mpc7450_compute_mmcr, + .get_constraint = mpc7450_get_constraint, + .get_alternatives = mpc7450_get_alternatives, + .disable_pmc = mpc7450_disable_pmc, + .n_generic = ARRAY_SIZE(mpc7450_generic_events), + .generic_events = mpc7450_generic_events, + .cache_events = &mpc7450_cache_events, +}; + +static int init_mpc7450_pmu(void) +{ + if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) + return -ENODEV; + + return register_power_pmu(&mpc7450_pmu); +} + +arch_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index dd9f3ec5ee30..8485c8ca7a06 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -75,6 +75,7 @@ config POWER4_ONLY config 6xx def_bool y depends on PPC32 && PPC_BOOK3S + select PPC_HAVE_PMU_SUPPORT config POWER3 bool From e24a72c4d8f0b2c17783b3ba9c8931b537149423 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:54:26 +1000 Subject: [PATCH 36/49] perf_counter: tools: Makefile tweaks for 64-bit powerpc On 64-bit powerpc, perf needs to be built as a 64-bit executable. This arranges to add the -m64 flag to CFLAGS if we are running on a 64-bit machine, indicated by the result of uname -m ending in "64". This means that we'll use -m64 on x86_64 machines as well. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55666.866148.559620@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e8346f95fbb0..714db7327b94 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -157,9 +157,14 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') +# If we're on a 64-bit kernel, use -m64 +ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) + M64 := -m64 +endif + # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 +CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) From d3a9262e59f7fb83c6d44df3b2b1460ed57d3ea1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 12:54:00 +0200 Subject: [PATCH 37/49] fs: Provide empty .set_page_dirty() aop for anon inodes .set_page_dirty() is one of those a_ops that defaults to the buffer implementation when not set. Therefore provide a dummy function to make it do nothing. (Uncovered by perfcounters fd's which can now be writable-mmap-ed.) Signed-off-by: Peter Zijlstra Cc: Al Viro Cc: Davide Libenzi LKML-Reference: Signed-off-by: Ingo Molnar --- fs/anon_inodes.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1dd96d4406c0..47d4a01c5393 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { .d_delete = anon_inodefs_delete_dentry, }; +/* + * nop .set_page_dirty method so that people can use .page_mkwrite on + * anon inodes. + */ +static int anon_set_page_dirty(struct page *page) +{ + return 0; +}; + +static const struct address_space_operations anon_aops = { + .set_page_dirty = anon_set_page_dirty, +}; + /** * anon_inode_getfd - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" @@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void) inode->i_fop = &anon_inode_fops; + inode->i_mapping->a_ops = &anon_aops; + /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty From 43a21ea81a2400992561146327c4785ce7f7be38 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 19:39:37 +0100 Subject: [PATCH 38/49] perf_counter: Add event overlow handling Alternative method of mmap() data output handling that provides better overflow management and a more reliable data stream. Unlike the previous method, that didn't have any user->kernel feedback and relied on userspace keeping up, this method relies on userspace writing its last read position into the control page. It will ensure new output doesn't overwrite not-yet read events, new events for which there is no space left are lost and the overflow counter is incremented, providing exact event loss numbers. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 40 +++++--- kernel/perf_counter.c | 193 ++++++++++++++++++++++++----------- 2 files changed, 162 insertions(+), 71 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a7d3a61a59b7..0765e8e69843 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -236,10 +236,16 @@ struct perf_counter_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading this value should issue an rmb(), on SMP capable - * platforms, after reading this value -- see perf_counter_wakeup(). + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_counter_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. */ __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ }; #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) @@ -273,6 +279,15 @@ enum perf_event_type { */ PERF_EVENT_MMAP = 1, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_EVENT_LOST = 2, + /* * struct { * struct perf_event_header header; @@ -313,26 +328,26 @@ enum perf_event_type { /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_RECORD_* + * will be PERF_SAMPLE_* * * struct { * struct perf_event_header header; * - * { u64 ip; } && PERF_RECORD_IP - * { u32 pid, tid; } && PERF_RECORD_TID - * { u64 time; } && PERF_RECORD_TIME - * { u64 addr; } && PERF_RECORD_ADDR - * { u64 config; } && PERF_RECORD_CONFIG - * { u32 cpu, res; } && PERF_RECORD_CPU + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 config; } && PERF_SAMPLE_CONFIG + * { u32 cpu, res; } && PERF_SAMPLE_CPU * * { u64 nr; - * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP + * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP * * { u16 nr, * hv, * kernel, * user; - * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ }; @@ -424,6 +439,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; /* nr of data pages */ + int writable; /* are we writable */ int nr_locked; /* nr pages mlocked */ atomic_t poll; /* POLL_ for wakeups */ @@ -433,8 +449,8 @@ struct perf_mmap_data { atomic_long_t done_head; /* completed head */ atomic_t lock; /* concurrent writes */ - atomic_t wakeup; /* needs a wakeup */ + atomic_t lost; /* nr records lost */ struct perf_counter_mmap_page *user_page; void *data_pages[0]; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 109a95723859..7e9108efd305 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1794,6 +1794,12 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct perf_mmap_data *data; int ret = VM_FAULT_SIGBUS; + if (vmf->flags & FAULT_FLAG_MKWRITE) { + if (vmf->pgoff == 0) + ret = 0; + return ret; + } + rcu_read_lock(); data = rcu_dereference(counter->data); if (!data) @@ -1807,9 +1813,16 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if ((unsigned)nr > data->nr_pages) goto unlock; + if (vmf->flags & FAULT_FLAG_WRITE) + goto unlock; + vmf->page = virt_to_page(data->data_pages[nr]); } + get_page(vmf->page); + vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->index = vmf->pgoff; + ret = 0; unlock: rcu_read_unlock(); @@ -1862,6 +1875,14 @@ fail: return -ENOMEM; } +static void perf_mmap_free_page(unsigned long addr) +{ + struct page *page = virt_to_page(addr); + + page->mapping = NULL; + __free_page(page); +} + static void __perf_mmap_data_free(struct rcu_head *rcu_head) { struct perf_mmap_data *data; @@ -1869,9 +1890,10 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head) data = container_of(rcu_head, struct perf_mmap_data, rcu_head); - free_page((unsigned long)data->user_page); + perf_mmap_free_page((unsigned long)data->user_page); for (i = 0; i < data->nr_pages; i++) - free_page((unsigned long)data->data_pages[i]); + perf_mmap_free_page((unsigned long)data->data_pages[i]); + kfree(data); } @@ -1908,9 +1930,10 @@ static void perf_mmap_close(struct vm_area_struct *vma) } static struct vm_operations_struct perf_mmap_vmops = { - .open = perf_mmap_open, - .close = perf_mmap_close, - .fault = perf_mmap_fault, + .open = perf_mmap_open, + .close = perf_mmap_close, + .fault = perf_mmap_fault, + .page_mkwrite = perf_mmap_fault, }; static int perf_mmap(struct file *file, struct vm_area_struct *vma) @@ -1924,7 +1947,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) long user_extra, extra; int ret = 0; - if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) + if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; vma_size = vma->vm_end - vma->vm_start; @@ -1983,10 +2006,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_long_add(user_extra, &user->locked_vm); vma->vm_mm->locked_vm += extra; counter->data->nr_locked = extra; + if (vma->vm_flags & VM_WRITE) + counter->data->writable = 1; + unlock: mutex_unlock(&counter->mmap_mutex); - vma->vm_flags &= ~VM_MAYWRITE; vma->vm_flags |= VM_RESERVED; vma->vm_ops = &perf_mmap_vmops; @@ -2163,11 +2188,38 @@ struct perf_output_handle { unsigned long head; unsigned long offset; int nmi; - int overflow; + int sample; int locked; unsigned long flags; }; +static bool perf_output_space(struct perf_mmap_data *data, + unsigned int offset, unsigned int head) +{ + unsigned long tail; + unsigned long mask; + + if (!data->writable) + return true; + + mask = (data->nr_pages << PAGE_SHIFT) - 1; + /* + * Userspace could choose to issue a mb() before updating the tail + * pointer. So that all reads will be completed before the write is + * issued. + */ + tail = ACCESS_ONCE(data->user_page->data_tail); + smp_rmb(); + + offset = (offset - tail) & mask; + head = (head - tail) & mask; + + if ((int)(head - offset) < 0) + return false; + + return true; +} + static void perf_output_wakeup(struct perf_output_handle *handle) { atomic_set(&handle->data->poll, POLL_IN); @@ -2258,55 +2310,6 @@ out: local_irq_restore(handle->flags); } -static int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int overflow) -{ - struct perf_mmap_data *data; - unsigned int offset, head; - - /* - * For inherited counters we send all the output towards the parent. - */ - if (counter->parent) - counter = counter->parent; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto out; - - handle->data = data; - handle->counter = counter; - handle->nmi = nmi; - handle->overflow = overflow; - - if (!data->nr_pages) - goto fail; - - perf_output_lock(handle); - - do { - offset = head = atomic_long_read(&data->head); - head += size; - } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); - - handle->offset = offset; - handle->head = head; - - if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) - atomic_set(&data->wakeup, 1); - - return 0; - -fail: - perf_output_wakeup(handle); -out: - rcu_read_unlock(); - - return -ENOSPC; -} - static void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) { @@ -2346,6 +2349,78 @@ static void perf_output_copy(struct perf_output_handle *handle, #define perf_output_put(handle, x) \ perf_output_copy((handle), &(x), sizeof(x)) +static int perf_output_begin(struct perf_output_handle *handle, + struct perf_counter *counter, unsigned int size, + int nmi, int sample) +{ + struct perf_mmap_data *data; + unsigned int offset, head; + int have_lost; + struct { + struct perf_event_header header; + u64 id; + u64 lost; + } lost_event; + + /* + * For inherited counters we send all the output towards the parent. + */ + if (counter->parent) + counter = counter->parent; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (!data) + goto out; + + handle->data = data; + handle->counter = counter; + handle->nmi = nmi; + handle->sample = sample; + + if (!data->nr_pages) + goto fail; + + have_lost = atomic_read(&data->lost); + if (have_lost) + size += sizeof(lost_event); + + perf_output_lock(handle); + + do { + offset = head = atomic_long_read(&data->head); + head += size; + if (unlikely(!perf_output_space(data, offset, head))) + goto fail; + } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); + + handle->offset = offset; + handle->head = head; + + if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) + atomic_set(&data->wakeup, 1); + + if (have_lost) { + lost_event.header.type = PERF_EVENT_LOST; + lost_event.header.misc = 0; + lost_event.header.size = sizeof(lost_event); + lost_event.id = counter->id; + lost_event.lost = atomic_xchg(&data->lost, 0); + + perf_output_put(handle, lost_event); + } + + return 0; + +fail: + atomic_inc(&data->lost); + perf_output_unlock(handle); +out: + rcu_read_unlock(); + + return -ENOSPC; +} + static void perf_output_end(struct perf_output_handle *handle) { struct perf_counter *counter = handle->counter; @@ -2353,7 +2428,7 @@ static void perf_output_end(struct perf_output_handle *handle) int wakeup_events = counter->attr.wakeup_events; - if (handle->overflow && wakeup_events) { + if (handle->sample && wakeup_events) { int events = atomic_inc_return(&data->events); if (events >= wakeup_events) { atomic_sub(wakeup_events, &data->events); @@ -2958,7 +3033,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) } /* - * Generic counter overflow handling. + * Generic counter overflow handling, sampling. */ int perf_counter_overflow(struct perf_counter *counter, int nmi, From 9d91a6f7a489eb914c16b82d927f9d81d629c259 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 11:40:28 +0200 Subject: [PATCH 39/49] perf_counter tools: Handle lost events Make use of the new ->data_tail mechanism to tell kernel-space about user-space draining the data stream. Emit lost events (and display them) if they happen. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 20 ++++++++++++++++---- tools/perf/builtin-report.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e1dfef24887f..06fdfb8b4828 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -78,10 +78,10 @@ struct mmap_data { static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; -static unsigned int mmap_read_head(struct mmap_data *md) +static unsigned long mmap_read_head(struct mmap_data *md) { struct perf_counter_mmap_page *pc = md->base; - int head; + long head; head = pc->data_head; rmb(); @@ -89,6 +89,17 @@ static unsigned int mmap_read_head(struct mmap_data *md) return head; } +static void mmap_write_tail(struct mmap_data *md, unsigned long tail) +{ + struct perf_counter_mmap_page *pc = md->base; + + /* + * ensure all reads are done before we write the tail out. + */ + /* mb(); */ + pc->data_tail = tail; +} + static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -109,7 +120,7 @@ static void mmap_read(struct mmap_data *md) * In either case, truncate and restart at head. */ diff = head - old; - if (diff > md->mask / 2 || diff < 0) { + if (diff < 0) { struct timeval iv; unsigned long msecs; @@ -167,6 +178,7 @@ static void mmap_read(struct mmap_data *md) } md->prev = old; + mmap_write_tail(md, old); } static volatile int done = 0; @@ -424,7 +436,7 @@ try_again: mmap_array[nr_cpu][counter].prev = 0; mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { error("failed to mmap with %d (%s)\n", errno, strerror(errno)); exit(-1); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9a3805f0c9f2..fe66895111b1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -83,6 +83,12 @@ struct period_event { __u64 sample_period; }; +struct lost_event { + struct perf_event_header header; + __u64 id; + __u64 lost; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; @@ -90,6 +96,7 @@ typedef union event_union { struct comm_event comm; struct fork_event fork; struct period_event period; + struct lost_event lost; } event_t; static LIST_HEAD(dsos); @@ -1068,7 +1075,8 @@ static unsigned long total = 0, total_mmap = 0, total_comm = 0, total_fork = 0, - total_unknown = 0; + total_unknown = 0, + total_lost = 0; static int validate_chain(struct perf_callchain_entry *chain, event_t *event) { @@ -1260,6 +1268,20 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static int +process_lost_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->lost.id, + event->lost.lost); + + total_lost += event->lost.lost; + + return 0; +} + static void trace_event(event_t *event) { unsigned char *raw_event = (void *)event; @@ -1316,6 +1338,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_PERIOD: return process_period_event(event, offset, head); + + case PERF_EVENT_LOST: + return process_lost_event(event, offset, head); + /* * We dont process them right now but they are fine: */ @@ -1444,6 +1470,7 @@ more: dprintf(" mmap events: %10ld\n", total_mmap); dprintf(" comm events: %10ld\n", total_comm); dprintf(" fork events: %10ld\n", total_fork); + dprintf(" lost events: %10ld\n", total_lost); dprintf(" unknown events: %10ld\n", total_unknown); if (dump_trace) From b8e6d829729d1a5991a9f628205b671cac2ec06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 14:32:19 +0200 Subject: [PATCH 40/49] perf report: Filter to parent set by default Make it easier to use parent filtering - default to a filtered output. Also add the parent column so that we get collapsing but dont display it by default. add --no-exclude-other to override this. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- tools/perf/builtin-report.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 714db7327b94..672c5f069c6e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -164,7 +164,7 @@ endif # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 +CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fe66895111b1..86981bd08f65 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -46,9 +46,12 @@ static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; -static char *parent_pattern = "^sys_|^do_page_fault"; +static char default_parent_pattern[] = "^sys_|^do_page_fault"; +static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; +static int exclude_other = 1; + struct ip_event { struct perf_event_header header; __u64 ip; @@ -742,6 +745,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) struct sort_entry *se; size_t ret; + if (exclude_other && !self->parent) + return 0; + if (total_samples) { double percent = self->count * 100.0 / total_samples; char *color = PERF_COLOR_NORMAL; @@ -764,6 +770,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) ret = fprintf(fp, "%12Ld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { + if (exclude_other && (se == &sort_parent)) + continue; + fprintf(fp, " "); ret += se->print(fp, self); } @@ -855,6 +864,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .ip = ip, .level = level, .count = count, + .parent = NULL, }; int cmp; @@ -1029,14 +1039,20 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); - list_for_each_entry(se, &hist_entry__sort_list, list) + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (exclude_other && (se == &sort_parent)) + continue; fprintf(fp, " %s", se->header); + } fprintf(fp, "\n"); fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { int i; + if (exclude_other && (se == &sort_parent)) + continue; + fprintf(fp, " "); for (i = 0; i < strlen(se->header); i++) fprintf(fp, "."); @@ -1050,7 +1066,8 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) ret += hist_entry__fprintf(fp, pos, total_samples); } - if (!strcmp(sort_order, default_sort_order)) { + if (sort_order == default_sort_order && + parent_pattern == default_parent_pattern) { fprintf(fp, "#\n"); fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); fprintf(fp, "#\n"); @@ -1508,6 +1525,8 @@ static const struct option options[] = { "Don't shorten the pathnames taking into account the cwd"), OPT_STRING('p', "parent", &parent_pattern, "regex", "regex filter to identify parent, see: '--sort parent'"), + OPT_BOOLEAN('x', "exclude-other", &exclude_other, + "Only display entries with parent-match"), OPT_END() }; @@ -1536,6 +1555,11 @@ int cmd_report(int argc, const char **argv, const char *prefix) setup_sorting(); + if (parent_pattern != default_parent_pattern) + sort_dimension__add("parent"); + else + exclude_other = 0; + /* * Any (unrecognized) arguments left? */ From f9188e023c248d73f5b4a589b480e065c1864068 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 22:20:52 +0200 Subject: [PATCH 41/49] perf_counter: Make callchain samples extensible Before exposing upstream tools to a callchain-samples ABI, tidy it up to make it more extensible in the future: Use markers in the IP chain to denote context, use (u64)-1..-4095 range for these context markers because we use them for ERR_PTR(), so these addresses are unlikely to be mapped. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 29 ++++++----------------------- include/linux/perf_counter.h | 28 +++++++++++++++++----------- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ce1ae3f1f86c..76dfef23f789 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1555,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) */ static inline -void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) +void callchain_store(struct perf_callchain_entry *entry, u64 ip) { - if (entry->nr < MAX_STACK_DEPTH) + if (entry->nr < PERF_MAX_STACK_DEPTH) entry->ip[entry->nr++] = ip; } @@ -1602,22 +1602,10 @@ static const struct stacktrace_ops backtrace_ops = { static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { - unsigned long bp; - char *stack; - int nr = entry->nr; - + callchain_store(entry, PERF_CONTEXT_KERNEL); callchain_store(entry, regs->ip); - stack = ((char *)regs + sizeof(struct pt_regs)); -#ifdef CONFIG_FRAME_POINTER - get_bp(bp); -#else - bp = 0; -#endif - - dump_trace(NULL, regs, (void *)&stack, bp, &backtrace_ops, entry); - - entry->kernel = entry->nr - nr; + dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); } /* @@ -1669,16 +1657,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) { struct stack_frame frame; const void __user *fp; - int nr = entry->nr; if (!user_mode(regs)) regs = task_pt_regs(current); fp = (void __user *)regs->bp; + callchain_store(entry, PERF_CONTEXT_USER); callchain_store(entry, regs->ip); - while (entry->nr < MAX_STACK_DEPTH) { + while (entry->nr < PERF_MAX_STACK_DEPTH) { frame.next_frame = NULL; frame.return_address = 0; @@ -1691,8 +1679,6 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, frame.return_address); fp = frame.next_frame; } - - entry->user = entry->nr - nr; } static void @@ -1728,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry = &__get_cpu_var(irq_entry); entry->nr = 0; - entry->hv = 0; - entry->kernel = 0; - entry->user = 0; perf_do_callchain(regs, entry); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0765e8e69843..e7e7e0242767 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -343,23 +343,22 @@ enum perf_event_type { * { u64 nr; * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP * - * { u16 nr, - * hv, - * kernel, - * user; + * { u64 nr, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ }; -#define MAX_STACK_DEPTH 255 +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, -struct perf_callchain_entry { - __u16 nr; - __u16 hv; - __u16 kernel; - __u16 user; - __u64 ip[MAX_STACK_DEPTH]; + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, }; #ifdef __KERNEL__ @@ -381,6 +380,13 @@ struct perf_callchain_entry { #include #include +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + struct task_struct; /** From 2a0a50fe9def21835d65035cc8109c0b6dd6099d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 22:20:45 +0200 Subject: [PATCH 42/49] perf_counter: Update userspace callchain sampling uses Update the tools to reflect the new callchain sampling format. LKML-Reference: Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 82 +++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 86981bd08f65..7a6577bf9a41 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,6 +59,11 @@ struct ip_event { unsigned char __more_data[]; }; +struct ip_callchain { + __u64 nr; + __u64 ips[0]; +}; + struct mmap_event { struct perf_event_header header; __u32 pid, tid; @@ -833,15 +838,12 @@ got_dso: return dso->find_symbol(dso, ip); } -static struct symbol *call__match(struct symbol *sym) +static int call__match(struct symbol *sym) { - if (!sym) - return NULL; - if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) - return sym; + return 1; - return NULL; + return 0; } /* @@ -850,7 +852,7 @@ static struct symbol *call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, struct perf_callchain_entry *chain, + struct symbol *sym, __u64 ip, struct ip_callchain *chain, char level, __u64 count) { struct rb_node **p = &hist.rb_node; @@ -869,31 +871,35 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, int cmp; if (sort__has_parent && chain) { - int i, nr = chain->hv; - struct symbol *sym; - struct dso *dso; - __u64 ip; + __u64 context = PERF_CONTEXT_MAX; + int i; + + for (i = 0; i < chain->nr; i++) { + __u64 ip = chain->ips[i]; + struct dso *dso = NULL; + struct symbol *sym; + + if (ip >= PERF_CONTEXT_MAX) { + context = ip; + continue; + } + + switch (context) { + case PERF_CONTEXT_KERNEL: + dso = kernel_dso; + break; + default: + break; + } - for (i = 0; i < chain->kernel; i++) { - ip = chain->ip[nr + i]; - dso = kernel_dso; sym = resolve_symbol(thread, NULL, &dso, &ip); - entry.parent = call__match(sym); - if (entry.parent) - goto got_parent; - } - nr += i; - for (i = 0; i < chain->user; i++) { - ip = chain->ip[nr + i]; - sym = resolve_symbol(thread, NULL, NULL, &ip); - entry.parent = call__match(sym); - if (entry.parent) - goto got_parent; + if (sym && call__match(sym)) { + entry.parent = sym; + break; + } } - nr += i; } -got_parent: while (*p != NULL) { parent = *p; @@ -1095,21 +1101,10 @@ static unsigned long total = 0, total_unknown = 0, total_lost = 0; -static int validate_chain(struct perf_callchain_entry *chain, event_t *event) +static int validate_chain(struct ip_callchain *chain, event_t *event) { unsigned int chain_size; - if (chain->nr > MAX_STACK_DEPTH) - return -1; - if (chain->hv > MAX_STACK_DEPTH) - return -1; - if (chain->kernel > MAX_STACK_DEPTH) - return -1; - if (chain->user > MAX_STACK_DEPTH) - return -1; - if (chain->hv + chain->kernel + chain->user != chain->nr) - return -1; - chain_size = event->header.size; chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; @@ -1130,7 +1125,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; - struct perf_callchain_entry *chain = NULL; + struct ip_callchain *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { period = *(__u64 *)more_data; @@ -1150,10 +1145,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) chain = (void *)more_data; - dprintf("... chain: u:%d, k:%d, nr:%d\n", - chain->user, - chain->kernel, - chain->nr); + dprintf("... chain: nr:%Lu\n", chain->nr); if (validate_chain(chain, event) < 0) { eprintf("call-chain problem with event, skipping it.\n"); @@ -1162,7 +1154,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) if (dump_trace) { for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %016Lx\n", i, chain->ip[i]); + dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); } } From f5970550d5ccf90453cbd7d260370ea99d1f6513 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 23:22:55 +0200 Subject: [PATCH 43/49] perf_counter tools: Add a data file header Add a data file header so we can transfer data between record and report. LKML-Reference: Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 94 ++++++++++++++++++++----------------- tools/perf/builtin-report.c | 16 ++++++- tools/perf/perf.h | 6 +++ 3 files changed, 73 insertions(+), 43 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 06fdfb8b4828..28304677c73e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -51,6 +51,9 @@ static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; static int nr_poll; static int nr_cpu; +static int file_new = 1; +static struct perf_file_header file_header; + struct mmap_event { struct perf_event_header header; __u32 pid; @@ -100,6 +103,21 @@ static void mmap_write_tail(struct mmap_data *md, unsigned long tail) pc->data_tail = tail; } +static void write_output(void *buf, size_t size) +{ + while (size) { + int ret = write(output, buf, size); + + if (ret < 0) + die("failed to write"); + + size -= ret; + buf += ret; + + bytes_written += ret; + } +} + static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -148,34 +166,14 @@ static void mmap_read(struct mmap_data *md) size = md->mask + 1 - (old & md->mask); old += size; - while (size) { - int ret = write(output, buf, size); - - if (ret < 0) - die("failed to write"); - - size -= ret; - buf += ret; - - bytes_written += ret; - } + write_output(buf, size); } buf = &data[old & md->mask]; size = head - old; old += size; - while (size) { - int ret = write(output, buf, size); - - if (ret < 0) - die("failed to write"); - - size -= ret; - buf += ret; - - bytes_written += ret; - } + write_output(buf, size); md->prev = old; mmap_write_tail(md, old); @@ -204,7 +202,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) struct comm_event comm_ev; char filename[PATH_MAX]; char bf[BUFSIZ]; - int fd, ret; + int fd; size_t size; char *field, *sep; DIR *tasks; @@ -246,11 +244,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) if (!full) { comm_ev.tid = pid; - ret = write(output, &comm_ev, comm_ev.header.size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } + write_output(&comm_ev, comm_ev.header.size); return; } @@ -265,11 +259,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) comm_ev.tid = pid; - ret = write(output, &comm_ev, comm_ev.header.size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } + write_output(&comm_ev, comm_ev.header.size); } closedir(tasks); return; @@ -332,10 +322,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) mmap_ev.pid = pid; mmap_ev.tid = pid; - if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { - perror("failed to write"); - exit(-1); - } + write_output(&mmap_ev, mmap_ev.header.size); } } @@ -382,6 +369,15 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + if (file_new) { + file_header.sample_type = attr->sample_type; + } else { + if (file_header.sample_type != attr->sample_type) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; @@ -461,6 +457,13 @@ static void open_counters(int cpu, pid_t pid) nr_cpu++; } +static void atexit_header(void) +{ + file_header.data_size += bytes_written; + + pwrite(output, &file_header, sizeof(file_header), 0); +} + static int __cmd_record(int argc, const char **argv) { int i, counter; @@ -474,6 +477,10 @@ static int __cmd_record(int argc, const char **argv) assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus >= 0); + atexit(sig_atexit); + signal(SIGCHLD, sig_handler); + signal(SIGINT, sig_handler); + if (!stat(output_name, &st) && !force && !append_file) { fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", output_name); @@ -482,7 +489,7 @@ static int __cmd_record(int argc, const char **argv) flags = O_CREAT|O_RDWR; if (append_file) - flags |= O_APPEND; + file_new = 0; else flags |= O_TRUNC; @@ -492,15 +499,18 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } + if (!file_new) { + read(output, &file_header, sizeof(file_header)); + lseek(output, file_header.data_size, SEEK_CUR); + } + + atexit(atexit_header); + if (!system_wide) { open_counters(-1, target_pid != -1 ? target_pid : getpid()); } else for (i = 0; i < nr_cpus; i++) open_counters(i, target_pid); - atexit(sig_atexit); - signal(SIGCHLD, sig_handler); - signal(SIGINT, sig_handler); - if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7a6577bf9a41..37b26ecb0d0b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1366,11 +1366,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static struct perf_file_header file_header; + static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; unsigned long offset = 0; - unsigned long head = 0; + unsigned long head = sizeof(file_header); struct stat stat; event_t *event; uint32_t size; @@ -1398,6 +1400,14 @@ static int __cmd_report(void) exit(0); } + read(input, &file_header, sizeof(file_header)); + + if (sort__has_parent && + !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { + fprintf(stderr, "selected --sort parent, but no callchain data\n"); + exit(-1); + } + if (load_kernel() < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; @@ -1469,9 +1479,13 @@ more: head += size; + if (offset + head >= sizeof(file_header) + file_header.data_size) + goto done; + if (offset + head < stat.st_size) goto more; +done: rc = EXIT_SUCCESS; close(input); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 87a1aca4a424..55c62f4b990b 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -65,4 +65,10 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 +struct perf_file_header { + __u64 version; + __u64 sample_type; + __u64 data_size; +}; + #endif From e5289d4a181fb6c0b7a7607649af2ffdc491335c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Jun 2009 13:22:51 +0200 Subject: [PATCH 44/49] perf_counter: Simplify and fix task migration counting The task migrations counter was causing rare and hard to decypher memory corruptions under load. After a day of debugging and bisection we found that the problem was introduced with: 3f731ca: perf_counter: Fix cpu migration counter Turning them off fixes the crashes. Incidentally, the whole perf_counter_task_migration() logic can be done simpler as well, by injecting a proper sw-counter event. This cleanup also fixed the crashes. The precise failure mode is not completely clear yet, but we are clearly not unhappy about having a fix ;-) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ---- kernel/perf_counter.c | 23 +---------------------- kernel/sched.c | 3 ++- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e7e7e0242767..89698d8aba5c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -682,8 +682,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk); -extern void perf_counter_task_migration(struct task_struct *task, int cpu); - extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_paranoid; @@ -724,8 +722,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { } static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } -static inline void perf_counter_task_migration(struct task_struct *task, - int cpu) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7e9108efd305..8d4f0dd41c22 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -124,7 +124,7 @@ void perf_enable(void) static void get_ctx(struct perf_counter_context *ctx) { - atomic_inc(&ctx->refcount); + WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); } static void free_ctx(struct rcu_head *head) @@ -3467,27 +3467,6 @@ static const struct pmu perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; -/* - * Software counter: cpu migrations - */ -void perf_counter_task_migration(struct task_struct *task, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx; - - perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, - PERF_COUNT_SW_CPU_MIGRATIONS, - 1, 1, NULL, 0); - - ctx = perf_pin_task_context(task); - if (ctx) { - perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, - PERF_COUNT_SW_CPU_MIGRATIONS, - 1, 1, NULL, 0); - perf_unpin_context(ctx); - } -} - #ifdef CONFIG_EVENT_PROFILE void perf_tpcounter_event(int event_id) { diff --git a/kernel/sched.c b/kernel/sched.c index 8fb88a906aaa..f46540b359c0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); #endif - perf_counter_task_migration(p, new_cpu); + perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, + 1, 1, NULL, 0); } p->se.vruntime -= old_cfsrq->min_vruntime - new_cfsrq->min_vruntime; From 0c87197142427063e096f11603543ca874045952 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 11:35:01 +0200 Subject: [PATCH 45/49] perf_counter, x86: Improve interactions with fast-gup Improve a few details in perfcounter call-chain recording that makes use of fast-GUP: - Use ACCESS_ONCE() to observe the pte value. ptes are fundamentally racy and can be changed on another CPU, so we have to be careful about how we access them. The PAE branch is already careful with read-barriers - but the non-PAE and 64-bit side needs an ACCESS_ONCE() to make sure the pte value is observed only once. - make the checks a bit stricter so that we can feed it any kind of cra^H^H^H user-space input ;-) Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 7 ++++++- arch/x86/mm/gup.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..512ee87062c2 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -25,7 +25,12 @@ #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) #define KERNEL_DS MAKE_MM_SEG(-1UL) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#ifdef CONFIG_X86_32 +# define USER_DS MAKE_MM_SEG(PAGE_OFFSET) +#else +# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK) +#endif #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 697d5727c119..2d1d784ad3f7 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -14,7 +14,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) { #ifndef CONFIG_X86_PAE - return *ptep; + return ACCESS_ONCE(*ptep); #else /* * With get_user_pages_fast, we walk down the pagetables without taking From b49a9e7e72103ea91946453c19703a4dfa1994fe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Jun 2009 17:39:33 +0200 Subject: [PATCH 46/49] perf_counter: Close race in perf_lock_task_context() perf_lock_task_context() is buggy because it can return a dead context. the RCU read lock in perf_lock_task_context() only guarantees the memory won't get freed, it doesn't guarantee the object is valid (in our case refcount > 0). Therefore we can return a locked object that can get freed the moment we release the rcu read lock. perf_pin_task_context() then increases the refcount and does an unlock on freed memory. That increased refcount will cause a double free, in case it started out with 0. Ammend this by including the get_ctx() functionality in perf_lock_task_context() (all users already did this later anyway), and return a NULL context when the found one is already dead. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8d4f0dd41c22..adb6ae506d5b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -175,6 +175,11 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } + + if (!atomic_inc_not_zero(&ctx->refcount)) { + spin_unlock_irqrestore(&ctx->lock, *flags); + ctx = NULL; + } } rcu_read_unlock(); return ctx; @@ -193,7 +198,6 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta ctx = perf_lock_task_context(task, &flags); if (ctx) { ++ctx->pin_count; - get_ctx(ctx); spin_unlock_irqrestore(&ctx->lock, flags); } return ctx; @@ -1459,11 +1463,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) put_ctx(parent_ctx); ctx->parent_ctx = NULL; /* no longer a clone */ } - /* - * Get an extra reference before dropping the lock so that - * this context won't get freed if the task exits. - */ - get_ctx(ctx); spin_unlock_irqrestore(&ctx->lock, flags); } From 9cffa8d53335d891cc0ecb3824a67118b3ee4b2f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 19 Jun 2009 22:21:42 +1000 Subject: [PATCH 47/49] perf_counter tools: Define and use our own u64, s64 etc. definitions On 64-bit powerpc, __u64 is defined to be unsigned long rather than unsigned long long. This causes compiler warnings every time we print a __u64 value with %Lx. Rather than changing __u64, we define our own u64 to be unsigned long long on all architectures, and similarly s64 as signed long long. For consistency we also define u32, s32, u16, s16, u8 and s8. These definitions are put in a new header, types.h, because these definitions are needed in util/string.h and util/symbol.h. The main change here is the mechanical change of __[us]{64,32,16,8} to remove the "__". The other changes are: * Create types.h * Include types.h in perf.h, util/string.h and util/symbol.h * Add types.h to the LIB_H definition in Makefile * Added (u64) casts in process_overflow_event() and print_sym_table() to kill two remaining warnings. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/builtin-annotate.c | 66 +++++++++++++------------- tools/perf/builtin-record.c | 20 ++++---- tools/perf/builtin-report.c | 84 +++++++++++++++++----------------- tools/perf/builtin-stat.c | 62 ++++++++++++------------- tools/perf/builtin-top.c | 24 +++++----- tools/perf/perf.h | 7 +-- tools/perf/types.h | 17 +++++++ tools/perf/util/parse-events.c | 10 ++-- tools/perf/util/string.c | 2 +- tools/perf/util/string.h | 4 +- tools/perf/util/symbol.c | 20 ++++---- tools/perf/util/symbol.h | 15 +++--- 13 files changed, 176 insertions(+), 156 deletions(-) create mode 100644 tools/perf/types.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 672c5f069c6e..36d7eef49913 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -290,6 +290,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += perf.h +LIB_H += types.h LIB_H += util/list.h LIB_H += util/rbtree.h LIB_H += util/levenshtein.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 94cea678fd7e..7e58e3ad1508 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -50,35 +50,35 @@ static unsigned long mmap_window = 32; struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, tid; + u64 ip; + u32 pid, tid; }; struct mmap_event { struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid, tid; + u32 pid, tid; char comm[16]; }; struct fork_event { struct perf_event_header header; - __u32 pid, ppid; + u32 pid, ppid; }; struct period_event { struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; + u64 time; + u64 id; + u64 sample_period; }; typedef union event_union { @@ -158,7 +158,7 @@ static void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) +static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { return dso__find_symbol(kernel_dso, ip); } @@ -191,19 +191,19 @@ static int load_kernel(void) struct map { struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); + u64 start; + u64 end; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); struct dso *dso; }; -static __u64 map__map_ip(struct map *map, __u64 ip) +static u64 map__map_ip(struct map *map, u64 ip) { return ip - map->start + map->pgoff; } -static __u64 vdso__map_ip(struct map *map, __u64 ip) +static u64 vdso__map_ip(struct map *map, u64 ip) { return ip; } @@ -386,7 +386,7 @@ static int thread__fork(struct thread *self, struct thread *parent) return 0; } -static struct map *thread__find_map(struct thread *self, __u64 ip) +static struct map *thread__find_map(struct thread *self, u64 ip) { struct map *pos; @@ -427,7 +427,7 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; - __u64 ip; + u64 ip; char level; uint32_t count; @@ -532,7 +532,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) if (self->dso) return fprintf(fp, "%-25s", self->dso->name); - return fprintf(fp, "%016llx ", (__u64)self->ip); + return fprintf(fp, "%016llx ", (u64)self->ip); } static struct sort_entry sort_dso = { @@ -546,7 +546,7 @@ static struct sort_entry sort_dso = { static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { - __u64 ip_l, ip_r; + u64 ip_l, ip_r; if (left->sym == right->sym) return 0; @@ -563,13 +563,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) size_t ret = 0; if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); + ret += fprintf(fp, "%#018llx ", (u64)self->ip); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : '.', self->sym->name); } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); + ret += fprintf(fp, "%#016llx", (u64)self->ip); } return ret; @@ -660,7 +660,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) /* * collect histogram counts */ -static void hist_hit(struct hist_entry *he, __u64 ip) +static void hist_hit(struct hist_entry *he, u64 ip) { unsigned int sym_size, offset; struct symbol *sym = he->sym; @@ -689,7 +689,7 @@ static void hist_hit(struct hist_entry *he, __u64 ip) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level) + struct symbol *sym, u64 ip, char level) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -861,7 +861,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) int show = 0; struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; + u64 ip = event->ip.ip; struct map *map = NULL; dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", @@ -1062,14 +1062,14 @@ static char *get_color(double percent) } static int -parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) +parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) { char *line = NULL, *tmp, *tmp2; static const char *prev_line; static const char *prev_color; unsigned int offset; size_t line_len; - __u64 line_ip; + u64 line_ip; int ret; char *c; @@ -1191,7 +1191,7 @@ static void free_source_line(struct symbol *sym, int len) /* Get the filename:line for the colored entries */ static void -get_source_line(struct symbol *sym, __u64 start, int len, char *filename) +get_source_line(struct symbol *sym, u64 start, int len, char *filename) { int i; char cmd[PATH_MAX * 2]; @@ -1209,7 +1209,7 @@ get_source_line(struct symbol *sym, __u64 start, int len, char *filename) for (i = 0; i < len; i++) { char *path = NULL; size_t line_len; - __u64 offset; + u64 offset; FILE *fp; sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; @@ -1269,7 +1269,7 @@ static void print_summary(char *filename) static void annotate_sym(struct dso *dso, struct symbol *sym) { char *filename = dso->name; - __u64 start, end, len; + u64 start, end, len; char command[PATH_MAX*2]; FILE *file; @@ -1297,7 +1297,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) if (verbose >= 2) printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (u64)start, (u64)end, filename); if (verbose >= 3) printf("doing: %s\n", command); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 28304677c73e..e2cebc053bd7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -44,7 +44,7 @@ static long samples; static struct timeval last_read; static struct timeval this_read; -static __u64 bytes_written; +static u64 bytes_written; static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; @@ -56,18 +56,18 @@ static struct perf_file_header file_header; struct mmap_event { struct perf_event_header header; - __u32 pid; - __u32 tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid; + u32 tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid; - __u32 tid; + u32 pid; + u32 tid; char comm[16]; }; @@ -238,7 +238,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) comm_ev.pid = pid; comm_ev.header.type = PERF_EVENT_COMM; - size = ALIGN(size, sizeof(__u64)); + size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); if (!full) { @@ -315,7 +315,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(mmap_ev.filename, execname, size); - size = ALIGN(size, sizeof(__u64)); + size = ALIGN(size, sizeof(u64)); mmap_ev.len -= mmap_ev.start; mmap_ev.header.size = (sizeof(mmap_ev) - (sizeof(mmap_ev.filename) - size)); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 37b26ecb0d0b..de1b97845e9e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -54,47 +54,47 @@ static int exclude_other = 1; struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, tid; + u64 ip; + u32 pid, tid; unsigned char __more_data[]; }; struct ip_callchain { - __u64 nr; - __u64 ips[0]; + u64 nr; + u64 ips[0]; }; struct mmap_event { struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid, tid; + u32 pid, tid; char comm[16]; }; struct fork_event { struct perf_event_header header; - __u32 pid, ppid; + u32 pid, ppid; }; struct period_event { struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; + u64 time; + u64 id; + u64 sample_period; }; struct lost_event { struct perf_event_header header; - __u64 id; - __u64 lost; + u64 id; + u64 lost; }; typedef union event_union { @@ -163,7 +163,7 @@ static void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) +static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { return dso__find_symbol(kernel_dso, ip); } @@ -210,19 +210,19 @@ static int strcommon(const char *pathname) struct map { struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); + u64 start; + u64 end; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); struct dso *dso; }; -static __u64 map__map_ip(struct map *map, __u64 ip) +static u64 map__map_ip(struct map *map, u64 ip) { return ip - map->start + map->pgoff; } -static __u64 vdso__map_ip(struct map *map, __u64 ip) +static u64 vdso__map_ip(struct map *map, u64 ip) { return ip; } @@ -429,7 +429,7 @@ static int thread__fork(struct thread *self, struct thread *parent) return 0; } -static struct map *thread__find_map(struct thread *self, __u64 ip) +static struct map *thread__find_map(struct thread *self, u64 ip) { struct map *pos; @@ -471,10 +471,10 @@ struct hist_entry { struct dso *dso; struct symbol *sym; struct symbol *parent; - __u64 ip; + u64 ip; char level; - __u64 count; + u64 count; }; /* @@ -574,7 +574,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) if (self->dso) return fprintf(fp, "%-25s", self->dso->name); - return fprintf(fp, "%016llx ", (__u64)self->ip); + return fprintf(fp, "%016llx ", (u64)self->ip); } static struct sort_entry sort_dso = { @@ -588,7 +588,7 @@ static struct sort_entry sort_dso = { static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { - __u64 ip_l, ip_r; + u64 ip_l, ip_r; if (left->sym == right->sym) return 0; @@ -605,13 +605,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) size_t ret = 0; if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); + ret += fprintf(fp, "%#018llx ", (u64)self->ip); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : '.', self->sym->name); } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); + ret += fprintf(fp, "%#016llx", (u64)self->ip); } return ret; @@ -745,7 +745,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) } static size_t -hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) +hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { struct sort_entry *se; size_t ret; @@ -793,7 +793,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, - struct dso **dsop, __u64 *ipp) + struct dso **dsop, u64 *ipp) { struct dso *dso = dsop ? *dsop : NULL; struct map *map = mapp ? *mapp : NULL; @@ -852,8 +852,8 @@ static int call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, struct ip_callchain *chain, - char level, __u64 count) + struct symbol *sym, u64 ip, struct ip_callchain *chain, + char level, u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -871,11 +871,11 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, int cmp; if (sort__has_parent && chain) { - __u64 context = PERF_CONTEXT_MAX; + u64 context = PERF_CONTEXT_MAX; int i; for (i = 0; i < chain->nr; i++) { - __u64 ip = chain->ips[i]; + u64 ip = chain->ips[i]; struct dso *dso = NULL; struct symbol *sym; @@ -1032,7 +1032,7 @@ static void output__resort(void) } } -static size_t output__fprintf(FILE *fp, __u64 total_samples) +static size_t output__fprintf(FILE *fp, u64 total_samples) { struct hist_entry *pos; struct sort_entry *se; @@ -1041,7 +1041,7 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) fprintf(fp, "\n"); fprintf(fp, "#\n"); - fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); + fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); @@ -1108,7 +1108,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) chain_size = event->header.size; chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; - if (chain->nr*sizeof(__u64) > chain_size) + if (chain->nr*sizeof(u64) > chain_size) return -1; return 0; @@ -1121,15 +1121,15 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) int show = 0; struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; - __u64 period = 1; + u64 ip = event->ip.ip; + u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { - period = *(__u64 *)more_data; - more_data += sizeof(__u64); + period = *(u64 *)more_data; + more_data += sizeof(u64); } dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e5b3c0ff03a9..6d3eeac1ea25 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -85,29 +85,29 @@ static const unsigned int default_count[] = { static int run_count = 1; static int run_idx = 0; -static __u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_RUN][MAX_COUNTERS]; +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; -//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; +//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; -static __u64 runtime_nsecs[MAX_RUN]; -static __u64 walltime_nsecs[MAX_RUN]; -static __u64 runtime_cycles[MAX_RUN]; +static u64 runtime_nsecs[MAX_RUN]; +static u64 walltime_nsecs[MAX_RUN]; +static u64 runtime_cycles[MAX_RUN]; -static __u64 event_res_avg[MAX_COUNTERS][3]; -static __u64 event_res_noise[MAX_COUNTERS][3]; +static u64 event_res_avg[MAX_COUNTERS][3]; +static u64 event_res_noise[MAX_COUNTERS][3]; -static __u64 event_scaled_avg[MAX_COUNTERS]; +static u64 event_scaled_avg[MAX_COUNTERS]; -static __u64 runtime_nsecs_avg; -static __u64 runtime_nsecs_noise; +static u64 runtime_nsecs_avg; +static u64 runtime_nsecs_noise; -static __u64 walltime_nsecs_avg; -static __u64 walltime_nsecs_noise; +static u64 walltime_nsecs_avg; +static u64 walltime_nsecs_noise; -static __u64 runtime_cycles_avg; -static __u64 runtime_cycles_noise; +static u64 runtime_cycles_avg; +static u64 runtime_cycles_noise; static void create_perf_stat_counter(int counter) { @@ -158,7 +158,7 @@ static inline int nsec_counter(int counter) */ static void read_counter(int counter) { - __u64 *count, single_count[3]; + u64 *count, single_count[3]; ssize_t res; int cpu, nv; int scaled; @@ -172,8 +172,8 @@ static void read_counter(int counter) if (fd[cpu][counter] < 0) continue; - res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); - assert(res == nv * sizeof(__u64)); + res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -251,14 +251,14 @@ static int run_perf_stat(int argc, const char **argv) return WEXITSTATUS(status); } -static void print_noise(__u64 *count, __u64 *noise) +static void print_noise(u64 *count, u64 *noise) { if (run_count > 1) fprintf(stderr, " ( +- %7.3f%% )", (double)noise[0]/(count[0]+1)*100.0); } -static void nsec_printout(int counter, __u64 *count, __u64 *noise) +static void nsec_printout(int counter, u64 *count, u64 *noise) { double msecs = (double)count[0] / 1000000; @@ -274,7 +274,7 @@ static void nsec_printout(int counter, __u64 *count, __u64 *noise) print_noise(count, noise); } -static void abs_printout(int counter, __u64 *count, __u64 *noise) +static void abs_printout(int counter, u64 *count, u64 *noise) { fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); @@ -298,7 +298,7 @@ static void abs_printout(int counter, __u64 *count, __u64 *noise) */ static void print_counter(int counter) { - __u64 *count, *noise; + u64 *count, *noise; int scaled; count = event_res_avg[counter]; @@ -326,16 +326,16 @@ static void print_counter(int counter) /* * normalize_noise noise values down to stddev: */ -static void normalize_noise(__u64 *val) +static void normalize_noise(u64 *val) { double res; res = (double)*val / (run_count * sqrt((double)run_count)); - *val = (__u64)res; + *val = (u64)res; } -static void update_avg(const char *name, int idx, __u64 *avg, __u64 *val) +static void update_avg(const char *name, int idx, u64 *avg, u64 *val) { *avg += *val; @@ -380,19 +380,19 @@ static void calc_avg(void) for (i = 0; i < run_count; i++) { runtime_nsecs_noise += - abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); walltime_nsecs_noise += - abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); runtime_cycles_noise += - abs((__s64)(runtime_cycles[i] - runtime_cycles_avg)); + abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); for (j = 0; j < nr_counters; j++) { event_res_noise[j][0] += - abs((__s64)(event_res[i][j][0] - event_res_avg[j][0])); + abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); event_res_noise[j][1] += - abs((__s64)(event_res[i][j][1] - event_res_avg[j][1])); + abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); event_res_noise[j][2] += - abs((__s64)(event_res[i][j][2] - event_res_avg[j][2])); + abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); } } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fe338d3c5d7e..5352b5e352ed 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -54,7 +54,7 @@ static int system_wide = 0; static int default_interval = 100000; -static __u64 count_filter = 5; +static u64 count_filter = 5; static int print_entries = 15; static int target_pid = -1; @@ -79,8 +79,8 @@ static int dump_symtab; * Symbols */ -static __u64 min_ip; -static __u64 max_ip = -1ll; +static u64 min_ip; +static u64 max_ip = -1ll; struct sym_entry { struct rb_node rb_node; @@ -194,7 +194,7 @@ static void print_sym_table(void) 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); if (nr_counters == 1) { - printf("%Ld", attrs[0].sample_period); + printf("%Ld", (u64)attrs[0].sample_period); if (freq) printf("Hz "); else @@ -372,7 +372,7 @@ out_delete_dso: /* * Binary search in the histogram table and record the hit: */ -static void record_ip(__u64 ip, int counter) +static void record_ip(u64 ip, int counter) { struct symbol *sym = dso__find_symbol(kernel_dso, ip); @@ -392,7 +392,7 @@ static void record_ip(__u64 ip, int counter) samples--; } -static void process_event(__u64 ip, int counter) +static void process_event(u64 ip, int counter) { samples++; @@ -463,15 +463,15 @@ static void mmap_read_counter(struct mmap_data *md) for (; old != head;) { struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, target_pid; + u64 ip; + u32 pid, target_pid; }; struct mmap_event { struct perf_event_header header; - __u32 pid, target_pid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, target_pid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 55c62f4b990b..bccb529dac08 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,6 +19,7 @@ #include #include "../../include/linux/perf_counter.h" +#include "types.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all @@ -66,9 +67,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_NR_CPUS 256 struct perf_file_header { - __u64 version; - __u64 sample_type; - __u64 data_size; + u64 version; + u64 sample_type; + u64 data_size; }; #endif diff --git a/tools/perf/types.h b/tools/perf/types.h new file mode 100644 index 000000000000..5e75f9005940 --- /dev/null +++ b/tools/perf/types.h @@ -0,0 +1,17 @@ +#ifndef _PERF_TYPES_H +#define _PERF_TYPES_H + +/* + * We define u64 as unsigned long long for every architecture + * so that we can print it with %Lx without getting warnings. + */ +typedef unsigned long long u64; +typedef signed long long s64; +typedef unsigned int u32; +typedef signed int s32; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned char u8; +typedef signed char s8; + +#endif /* _PERF_TYPES_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f0c9f2627fe1..35d04da38d6a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -13,8 +13,8 @@ int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; struct event_symbol { - __u8 type; - __u64 config; + u8 type; + u64 config; char *symbol; }; @@ -96,7 +96,7 @@ static char *hw_cache_result [][MAX_ALIASES] = { char *event_name(int counter) { - __u64 config = attrs[counter].config; + u64 config = attrs[counter].config; int type = attrs[counter].type; static char buf[32]; @@ -112,7 +112,7 @@ char *event_name(int counter) return "unknown-hardware"; case PERF_TYPE_HW_CACHE: { - __u8 cache_type, cache_op, cache_result; + u8 cache_type, cache_op, cache_result; static char name[100]; cache_type = (config >> 0) & 0xff; @@ -202,7 +202,7 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a */ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) { - __u64 config, id; + u64 config, id; int type; unsigned int i; const char *sep, *pstr; diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index ec33c0c7f4e2..c93eca9a7be3 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -15,7 +15,7 @@ static int hex(char ch) * While we find nice hex chars, build a long_val. * Return number of chars processed. */ -int hex2u64(const char *ptr, __u64 *long_val) +int hex2u64(const char *ptr, u64 *long_val) { const char *p = ptr; *long_val = 0; diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 72812c1c9a7a..37b03255b425 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,8 +1,8 @@ #ifndef _PERF_STRING_H_ #define _PERF_STRING_H_ -#include +#include "../types.h" -int hex2u64(const char *ptr, __u64 *val); +int hex2u64(const char *ptr, u64 *val); #endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 49a55f813712..86e14375e74e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -9,9 +9,9 @@ const char *sym_hist_filter; -static struct symbol *symbol__new(__u64 start, __u64 len, +static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, - __u64 obj_start, int verbose) + u64 obj_start, int verbose) { size_t namelen = strlen(name) + 1; struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); @@ -21,14 +21,14 @@ static struct symbol *symbol__new(__u64 start, __u64 len, if (verbose >= 2) printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", - (__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); + (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); self->obj_start= obj_start; self->hist = NULL; self->hist_sum = 0; if (sym_hist_filter && !strcmp(name, sym_hist_filter)) - self->hist = calloc(sizeof(__u64), len); + self->hist = calloc(sizeof(u64), len); if (priv_size) { memset(self, 0, priv_size); @@ -89,7 +89,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) { struct rb_node **p = &self->syms.rb_node; struct rb_node *parent = NULL; - const __u64 ip = sym->start; + const u64 ip = sym->start; struct symbol *s; while (*p != NULL) { @@ -104,7 +104,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) rb_insert_color(&sym->rb_node, &self->syms); } -struct symbol *dso__find_symbol(struct dso *self, __u64 ip) +struct symbol *dso__find_symbol(struct dso *self, u64 ip) { struct rb_node *n; @@ -151,7 +151,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb goto out_failure; while (!feof(file)) { - __u64 start; + u64 start; struct symbol *sym; int line_len, len; char symbol_type; @@ -232,7 +232,7 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verb goto out_failure; while (!feof(file)) { - __u64 start, size; + u64 start, size; struct symbol *sym; int line_len, len; @@ -353,7 +353,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, { uint32_t nr_rel_entries, idx; GElf_Sym sym; - __u64 plt_offset; + u64 plt_offset; GElf_Shdr shdr_plt; struct symbol *f; GElf_Shdr shdr_rel_plt; @@ -523,7 +523,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; - __u64 obj_start; + u64 obj_start; if (!elf_sym__is_function(&sym)) continue; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5ad9b06c3f6f..ea332e56e458 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -2,16 +2,17 @@ #define _PERF_SYMBOL_ 1 #include +#include "../types.h" #include "list.h" #include "rbtree.h" struct symbol { struct rb_node rb_node; - __u64 start; - __u64 end; - __u64 obj_start; - __u64 hist_sum; - __u64 *hist; + u64 start; + u64 end; + u64 obj_start; + u64 hist_sum; + u64 *hist; void *priv; char name[0]; }; @@ -20,7 +21,7 @@ struct dso { struct list_head node; struct rb_root syms; unsigned int sym_priv_size; - struct symbol *(*find_symbol)(struct dso *, __u64 ip); + struct symbol *(*find_symbol)(struct dso *, u64 ip); char name[0]; }; @@ -36,7 +37,7 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) return ((void *)sym) - self->sym_priv_size; } -struct symbol *dso__find_symbol(struct dso *self, __u64 ip); +struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose); From 92bf309a9cd5fedd6c8eefbce0b9a95ada82d0a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Jun 2009 18:11:53 +0200 Subject: [PATCH 48/49] perf_counter: Push perf_sample_data through the swcounter code Push the perf_sample_data further outwards to the swcounter interface, to abstract it away some more. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 55 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index adb6ae506d5b..1a933a221ea4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3171,20 +3171,15 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) } static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs, u64 addr) + int nmi, struct perf_sample_data *data) { - struct perf_sample_data data = { - .regs = regs, - .addr = addr, - .period = counter->hw.last_period, - }; + data->period = counter->hw.last_period; perf_swcounter_update(counter); perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, &data)) + if (perf_counter_overflow(counter, nmi, data)) /* soft-disable the counter */ ; - } static int perf_swcounter_is_counting(struct perf_counter *counter) @@ -3249,18 +3244,18 @@ static int perf_swcounter_match(struct perf_counter *counter, } static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct pt_regs *regs, u64 addr) + int nmi, struct perf_sample_data *data) { int neg = atomic64_add_negative(nr, &counter->hw.count); - if (counter->hw.sample_period && !neg && regs) - perf_swcounter_overflow(counter, nmi, regs, addr); + if (counter->hw.sample_period && !neg && data->regs) + perf_swcounter_overflow(counter, nmi, data); } static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum perf_type_id type, u32 event, - u64 nr, int nmi, struct pt_regs *regs, - u64 addr) + enum perf_type_id type, + u32 event, u64 nr, int nmi, + struct perf_sample_data *data) { struct perf_counter *counter; @@ -3269,8 +3264,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, type, event, regs)) - perf_swcounter_add(counter, nr, nmi, regs, addr); + if (perf_swcounter_match(counter, type, event, data->regs)) + perf_swcounter_add(counter, nr, nmi, data); } rcu_read_unlock(); } @@ -3289,9 +3284,9 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) return &cpuctx->recursion[0]; } -static void __perf_swcounter_event(enum perf_type_id type, u32 event, - u64 nr, int nmi, struct pt_regs *regs, - u64 addr) +static void do_perf_swcounter_event(enum perf_type_id type, u32 event, + u64 nr, int nmi, + struct perf_sample_data *data) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); int *recursion = perf_swcounter_recursion_context(cpuctx); @@ -3304,7 +3299,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event, barrier(); perf_swcounter_ctx_event(&cpuctx->ctx, type, event, - nr, nmi, regs, addr); + nr, nmi, data); rcu_read_lock(); /* * doesn't really matter which of the child contexts the @@ -3312,7 +3307,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event, */ ctx = rcu_dereference(current->perf_counter_ctxp); if (ctx) - perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr); + perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); rcu_read_unlock(); barrier(); @@ -3325,7 +3320,12 @@ out: void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); + struct perf_sample_data data = { + .regs = regs, + .addr = addr, + }; + + do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); } static void perf_swcounter_read(struct perf_counter *counter) @@ -3469,12 +3469,15 @@ static const struct pmu perf_ops_task_clock = { #ifdef CONFIG_EVENT_PROFILE void perf_tpcounter_event(int event_id) { - struct pt_regs *regs = get_irq_regs(); + struct perf_sample_data data = { + .regs = get_irq_regs(); + .addr = 0, + }; - if (!regs) - regs = task_pt_regs(current); + if (!data.regs) + data.regs = task_pt_regs(current); - __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); + do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); } EXPORT_SYMBOL_GPL(perf_tpcounter_event); From eadc84cc01e04f9f74ec2de0c9355be035c7b396 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Jun 2009 02:01:40 +0200 Subject: [PATCH 49/49] perfcounter: Handle some IO return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building perfcounter tools raises the following warnings: builtin-record.c: In function ‘atexit_header’: builtin-record.c:464: erreur: ignoring return value of ‘pwrite’, declared with attribute warn_unused_result builtin-record.c: In function ‘__cmd_record’: builtin-record.c:503: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result builtin-report.c: In function ‘__cmd_report’: builtin-report.c:1403: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result This patch handles these IO return values. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1245456100-5477-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 9 +++++++-- tools/perf/builtin-report.c | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e2cebc053bd7..d7ebbd757543 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -461,7 +461,8 @@ static void atexit_header(void) { file_header.data_size += bytes_written; - pwrite(output, &file_header, sizeof(file_header), 0); + if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) + perror("failed to write on file headers"); } static int __cmd_record(int argc, const char **argv) @@ -500,7 +501,11 @@ static int __cmd_record(int argc, const char **argv) } if (!file_new) { - read(output, &file_header, sizeof(file_header)); + if (read(output, &file_header, sizeof(file_header)) == -1) { + perror("failed to read file headers"); + exit(-1); + } + lseek(output, file_header.data_size, SEEK_CUR); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index de1b97845e9e..5eb5566f0c95 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1400,7 +1400,10 @@ static int __cmd_report(void) exit(0); } - read(input, &file_header, sizeof(file_header)); + if (read(input, &file_header, sizeof(file_header)) == -1) { + perror("failed to read file headers"); + exit(-1); + } if (sort__has_parent && !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) {