diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f07b23011b22..ebb628332a6e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -30,6 +30,7 @@ #include "util/tool.h" #include "util/data.h" #include "arch/common.h" +#include "util/block-range.h" #include #include @@ -46,6 +47,103 @@ struct perf_annotate { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +/* + * Given one basic block: + * + * from to branch_i + * * ----> * + * | + * | block + * v + * * ----> * + * from to branch_i+1 + * + * where the horizontal are the branches and the vertical is the executed + * block of instructions. + * + * We count, for each 'instruction', the number of blocks that covered it as + * well as count the ratio each branch is taken. + * + * We can do this without knowing the actual instruction stream by keeping + * track of the address ranges. We break down ranges such that there is no + * overlap and iterate from the start until the end. + * + * @acme: once we parse the objdump output _before_ processing the samples, + * we can easily fold the branch.cycles IPC bits in. + */ +static void process_basic_block(struct addr_map_symbol *start, + struct addr_map_symbol *end, + struct branch_flags *flags) +{ + struct symbol *sym = start->sym; + struct annotation *notes = sym ? symbol__annotation(sym) : NULL; + struct block_range_iter iter; + struct block_range *entry; + + /* + * Sanity; NULL isn't executable and the CPU cannot execute backwards + */ + if (!start->addr || start->addr > end->addr) + return; + + iter = block_range__create(start->addr, end->addr); + if (!block_range_iter__valid(&iter)) + return; + + /* + * First block in range is a branch target. + */ + entry = block_range_iter(&iter); + assert(entry->is_target); + entry->entry++; + + do { + entry = block_range_iter(&iter); + + entry->coverage++; + entry->sym = sym; + + if (notes) + notes->max_coverage = max(notes->max_coverage, entry->coverage); + + } while (block_range_iter__next(&iter)); + + /* + * Last block in rage is a branch. + */ + entry = block_range_iter(&iter); + assert(entry->is_branch); + entry->taken++; + if (flags->predicted) + entry->pred++; +} + +static void process_branch_stack(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample) +{ + struct addr_map_symbol *prev = NULL; + struct branch_info *bi; + int i; + + if (!bs || !bs->nr) + return; + + bi = sample__resolve_bstack(sample, al); + if (!bi) + return; + + for (i = bs->nr - 1; i >= 0; i--) { + /* + * XXX filter against symbol + */ + if (prev) + process_basic_block(prev, &bi[i].from, &bi[i].flags); + prev = &bi[i].to; + } + + free(bi); +} + static int perf_evsel__add_sample(struct perf_evsel *evsel, struct perf_sample *sample, struct addr_location *al, @@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } + /* + * XXX filtered samples can still have branch entires pointing into our + * symbol and are missed. + */ + process_branch_stack(sample->branch_stack, al, sample); + sample->period = 1; sample->weight = 1; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f1a6d17c5a37..96f99d608d00 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,5 +1,6 @@ libperf-y += alias.o libperf-y += annotate.o +libperf-y += block-range.o libperf-y += build-id.o libperf-y += config.o libperf-y += ctype.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2ff6bd74e435..7a80c7362a03 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -17,6 +17,7 @@ #include "debug.h" #include "annotate.h" #include "evsel.h" +#include "block-range.h" #include #include #include @@ -859,6 +860,89 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, return percent; } +static const char *annotate__address_color(struct block_range *br) +{ + double cov = block_range__coverage(br); + + if (cov >= 0) { + /* mark red for >75% coverage */ + if (cov > 0.75) + return PERF_COLOR_RED; + + /* mark dull for <1% coverage */ + if (cov < 0.01) + return PERF_COLOR_NORMAL; + } + + return PERF_COLOR_MAGENTA; +} + +static const char *annotate__asm_color(struct block_range *br) +{ + double cov = block_range__coverage(br); + + if (cov >= 0) { + /* mark dull for <1% coverage */ + if (cov < 0.01) + return PERF_COLOR_NORMAL; + } + + return PERF_COLOR_BLUE; +} + +static void annotate__branch_printf(struct block_range *br, u64 addr) +{ + bool emit_comment = true; + + if (!br) + return; + +#if 1 + if (br->is_target && br->start == addr) { + struct block_range *branch = br; + double p; + + /* + * Find matching branch to our target. + */ + while (!branch->is_branch) + branch = block_range__next(branch); + + p = 100 *(double)br->entry / branch->coverage; + + if (p > 0.1) { + if (emit_comment) { + emit_comment = false; + printf("\t#"); + } + + /* + * The percentage of coverage joined at this target in relation + * to the next branch. + */ + printf(" +%.2f%%", p); + } + } +#endif + if (br->is_branch && br->end == addr) { + double p = 100*(double)br->taken / br->coverage; + + if (p > 0.1) { + if (emit_comment) { + emit_comment = false; + printf("\t#"); + } + + /* + * The percentage of coverage leaving at this branch, and + * its prediction ratio. + */ + printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred / br->taken); + } + } +} + + static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, int max_lines, struct disasm_line *queue) @@ -878,6 +962,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st s64 offset = dl->offset; const u64 addr = start + offset; struct disasm_line *next; + struct block_range *br; next = disasm__get_next_ip_line(¬es->src->source, dl); @@ -947,8 +1032,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st } printf(" : "); - color_fprintf(stdout, PERF_COLOR_MAGENTA, " %" PRIx64 ":", addr); - color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line); + + br = block_range__find(addr); + color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); + annotate__branch_printf(br, addr); + printf("\n"); if (ppercents != &percent) free(ppercents); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e96f4daed9b9..ea44e4ff19c6 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -130,6 +130,7 @@ struct annotated_source { struct annotation { pthread_mutex_t lock; + u64 max_coverage; struct annotated_source *src; }; diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c new file mode 100644 index 000000000000..7b3e1d75d803 --- /dev/null +++ b/tools/perf/util/block-range.c @@ -0,0 +1,328 @@ +#include "block-range.h" +#include "annotate.h" + +struct { + struct rb_root root; + u64 blocks; +} block_ranges; + +static void block_range__debug(void) +{ + /* + * XXX still paranoid for now; see if we can make this depend on + * DEBUG=1 builds. + */ +#if 1 + struct rb_node *rb; + u64 old = 0; /* NULL isn't executable */ + + for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) { + struct block_range *entry = rb_entry(rb, struct block_range, node); + + assert(old < entry->start); + assert(entry->start <= entry->end); /* single instruction block; jump to a jump */ + + old = entry->end; + } +#endif +} + +struct block_range *block_range__find(u64 addr) +{ + struct rb_node **p = &block_ranges.root.rb_node; + struct rb_node *parent = NULL; + struct block_range *entry; + + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct block_range, node); + + if (addr < entry->start) + p = &parent->rb_left; + else if (addr > entry->end) + p = &parent->rb_right; + else + return entry; + } + + return NULL; +} + +static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node) +{ + struct rb_node **p = &node->rb_left; + while (*p) { + node = *p; + p = &node->rb_right; + } + rb_link_node(left, node, p); +} + +static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node) +{ + struct rb_node **p = &node->rb_right; + while (*p) { + node = *p; + p = &node->rb_left; + } + rb_link_node(right, node, p); +} + +/** + * block_range__create + * @start: branch target starting this basic block + * @end: branch ending this basic block + * + * Create all the required block ranges to precisely span the given range. + */ +struct block_range_iter block_range__create(u64 start, u64 end) +{ + struct rb_node **p = &block_ranges.root.rb_node; + struct rb_node *n, *parent = NULL; + struct block_range *next, *entry = NULL; + struct block_range_iter iter = { NULL, NULL }; + + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct block_range, node); + + if (start < entry->start) + p = &parent->rb_left; + else if (start > entry->end) + p = &parent->rb_right; + else + break; + } + + /* + * Didn't find anything.. there's a hole at @start, however @end might + * be inside/behind the next range. + */ + if (!*p) { + if (!entry) /* tree empty */ + goto do_whole; + + /* + * If the last node is before, advance one to find the next. + */ + n = parent; + if (entry->end < start) { + n = rb_next(n); + if (!n) + goto do_whole; + } + next = rb_entry(n, struct block_range, node); + + if (next->start <= end) { /* add head: [start...][n->start...] */ + struct block_range *head = malloc(sizeof(struct block_range)); + if (!head) + return iter; + + *head = (struct block_range){ + .start = start, + .end = next->start - 1, + .is_target = 1, + .is_branch = 0, + }; + + rb_link_left_of_node(&head->node, &next->node); + rb_insert_color(&head->node, &block_ranges.root); + block_range__debug(); + + iter.start = head; + goto do_tail; + } + +do_whole: + /* + * The whole [start..end] range is non-overlapping. + */ + entry = malloc(sizeof(struct block_range)); + if (!entry) + return iter; + + *entry = (struct block_range){ + .start = start, + .end = end, + .is_target = 1, + .is_branch = 1, + }; + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &block_ranges.root); + block_range__debug(); + + iter.start = entry; + iter.end = entry; + goto done; + } + + /* + * We found a range that overlapped with ours, split if needed. + */ + if (entry->start < start) { /* split: [e->start...][start...] */ + struct block_range *head = malloc(sizeof(struct block_range)); + if (!head) + return iter; + + *head = (struct block_range){ + .start = entry->start, + .end = start - 1, + .is_target = entry->is_target, + .is_branch = 0, + + .coverage = entry->coverage, + .entry = entry->entry, + }; + + entry->start = start; + entry->is_target = 1; + entry->entry = 0; + + rb_link_left_of_node(&head->node, &entry->node); + rb_insert_color(&head->node, &block_ranges.root); + block_range__debug(); + + } else if (entry->start == start) + entry->is_target = 1; + + iter.start = entry; + +do_tail: + /* + * At this point we've got: @iter.start = [@start...] but @end can still be + * inside or beyond it. + */ + entry = iter.start; + for (;;) { + /* + * If @end is inside @entry, split. + */ + if (end < entry->end) { /* split: [...end][...e->end] */ + struct block_range *tail = malloc(sizeof(struct block_range)); + if (!tail) + return iter; + + *tail = (struct block_range){ + .start = end + 1, + .end = entry->end, + .is_target = 0, + .is_branch = entry->is_branch, + + .coverage = entry->coverage, + .taken = entry->taken, + .pred = entry->pred, + }; + + entry->end = end; + entry->is_branch = 1; + entry->taken = 0; + entry->pred = 0; + + rb_link_right_of_node(&tail->node, &entry->node); + rb_insert_color(&tail->node, &block_ranges.root); + block_range__debug(); + + iter.end = entry; + goto done; + } + + /* + * If @end matches @entry, done + */ + if (end == entry->end) { + entry->is_branch = 1; + iter.end = entry; + goto done; + } + + next = block_range__next(entry); + if (!next) + goto add_tail; + + /* + * If @end is in beyond @entry but not inside @next, add tail. + */ + if (end < next->start) { /* add tail: [...e->end][...end] */ + struct block_range *tail; +add_tail: + tail = malloc(sizeof(struct block_range)); + if (!tail) + return iter; + + *tail = (struct block_range){ + .start = entry->end + 1, + .end = end, + .is_target = 0, + .is_branch = 1, + }; + + rb_link_right_of_node(&tail->node, &entry->node); + rb_insert_color(&tail->node, &block_ranges.root); + block_range__debug(); + + iter.end = tail; + goto done; + } + + /* + * If there is a hole between @entry and @next, fill it. + */ + if (entry->end + 1 != next->start) { + struct block_range *hole = malloc(sizeof(struct block_range)); + if (!hole) + return iter; + + *hole = (struct block_range){ + .start = entry->end + 1, + .end = next->start - 1, + .is_target = 0, + .is_branch = 0, + }; + + rb_link_left_of_node(&hole->node, &next->node); + rb_insert_color(&hole->node, &block_ranges.root); + block_range__debug(); + } + + entry = next; + } + +done: + assert(iter.start->start == start && iter.start->is_target); + assert(iter.end->end == end && iter.end->is_branch); + + block_ranges.blocks++; + + return iter; +} + + +/* + * Compute coverage as: + * + * br->coverage / br->sym->max_coverage + * + * This ensures each symbol has a 100% spot, to reflect that each symbol has a + * most covered section. + * + * Returns [0-1] for coverage and -1 if we had no data what so ever or the + * symbol does not exist. + */ +double block_range__coverage(struct block_range *br) +{ + struct symbol *sym; + + if (!br) { + if (block_ranges.blocks) + return 0; + + return -1; + } + + sym = br->sym; + if (!sym) + return -1; + + return (double)br->coverage / symbol__annotation(sym)->max_coverage; +} diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h new file mode 100644 index 000000000000..a8c841381131 --- /dev/null +++ b/tools/perf/util/block-range.h @@ -0,0 +1,71 @@ +#ifndef __PERF_BLOCK_RANGE_H +#define __PERF_BLOCK_RANGE_H + +#include "symbol.h" + +/* + * struct block_range - non-overlapping parts of basic blocks + * @node: treenode + * @start: inclusive start of range + * @end: inclusive end of range + * @is_target: @start is a jump target + * @is_branch: @end is a branch instruction + * @coverage: number of blocks that cover this range + * @taken: number of times the branch is taken (requires @is_branch) + * @pred: number of times the taken branch was predicted + */ +struct block_range { + struct rb_node node; + + struct symbol *sym; + + u64 start; + u64 end; + + int is_target, is_branch; + + u64 coverage; + u64 entry; + u64 taken; + u64 pred; +}; + +static inline struct block_range *block_range__next(struct block_range *br) +{ + struct rb_node *n = rb_next(&br->node); + if (!n) + return NULL; + return rb_entry(n, struct block_range, node); +} + +struct block_range_iter { + struct block_range *start; + struct block_range *end; +}; + +static inline struct block_range *block_range_iter(struct block_range_iter *iter) +{ + return iter->start; +} + +static inline bool block_range_iter__next(struct block_range_iter *iter) +{ + if (iter->start == iter->end) + return false; + + iter->start = block_range__next(iter->start); + return true; +} + +static inline bool block_range_iter__valid(struct block_range_iter *iter) +{ + if (!iter->start || !iter->end) + return false; + return true; +} + +extern struct block_range *block_range__find(u64 addr); +extern struct block_range_iter block_range__create(u64 start, u64 end); +extern double block_range__coverage(struct block_range *br); + +#endif /* __PERF_BLOCK_RANGE_H */