linux-stable/tools/perf/dlfilters/dlfilter-show-cycles.c

145 lines
2.9 KiB
C
Raw Normal View History

perf dlfilter: Add dlfilter-show-cycles Add a new dlfilter to show cycles. Cycle counts are accumulated per CPU (or per thread if CPU is not recorded) from IPC information, and printed together with the change since the last print, at the start of each line. Separate counts are kept for branches, instructions or other events. Note also, the itrace A option can be useful to provide higher granularity cycle information. Example: $ perf record -e intel_pt/cyc/u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data ] $ perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so --deltatime | head 0 perf-exec 8509 [001] 0.000000000: psb offs: 0 0 perf-exec 8509 [001] 0.000000000: cbr: 42 freq: 4219 MHz (156%) 833 833 uname 8509 [001] 0.000047689: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _start 833 uname 8509 [001] 0.000003261: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2015 1182 uname 8509 [001] 0.000000282: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2676 661 uname 8509 [001] 0.000002629: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 3612 936 uname 8509 [001] 0.000001232: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 4579 967 uname 8509 [001] 0.000002519: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 6145 1566 uname 8509 [001] 0.000001050: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_setup_hash 6239 94 uname 8509 [001] 0.000000023: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_sysdep_start Reviewed-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: https://lore.kernel.org/r/20211027080334.365596-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-10-27 08:03:32 +00:00
// SPDX-License-Identifier: GPL-2.0
/*
* dlfilter-show-cycles.c: Print the number of cycles at the start of each line
* Copyright (c) 2021, Intel Corporation.
*/
#include <perf/perf_dlfilter.h>
#include <string.h>
#include <stdio.h>
#define MAX_CPU 4096
enum {
INSTR_CYC,
BRNCH_CYC,
OTHER_CYC,
MAX_ENTRY
};
static __u64 cycles[MAX_CPU][MAX_ENTRY];
static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY];
#define BITS 16
#define TABLESZ (1 << BITS)
#define TABLEMAX (TABLESZ / 2)
#define MASK (TABLESZ - 1)
static struct entry {
__u32 used;
__s32 tid;
__u64 cycles[MAX_ENTRY];
__u64 cycles_rpt[MAX_ENTRY];
} table[TABLESZ];
static int tid_cnt;
static int event_entry(const char *event)
{
if (!event)
return OTHER_CYC;
if (!strncmp(event, "instructions", 12))
return INSTR_CYC;
if (!strncmp(event, "branches", 8))
return BRNCH_CYC;
return OTHER_CYC;
}
static struct entry *find_entry(__s32 tid)
{
__u32 pos = tid & MASK;
struct entry *e;
e = &table[pos];
while (e->used) {
if (e->tid == tid)
return e;
if (++pos == TABLESZ)
pos = 0;
e = &table[pos];
}
if (tid_cnt >= TABLEMAX) {
fprintf(stderr, "Too many threads\n");
return NULL;
}
tid_cnt += 1;
e->used = 1;
e->tid = tid;
return e;
}
static void add_entry(__s32 tid, int pos, __u64 cnt)
{
struct entry *e = find_entry(tid);
if (e)
e->cycles[pos] += cnt;
}
int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
__s32 cpu = sample->cpu;
__s32 tid = sample->tid;
int pos;
if (!sample->cyc_cnt)
return 0;
pos = event_entry(sample->event);
if (cpu >= 0 && cpu < MAX_CPU)
cycles[cpu][pos] += sample->cyc_cnt;
else if (tid != -1)
add_entry(tid, pos, sample->cyc_cnt);
return 0;
}
static void print_vals(__u64 cycles, __u64 delta)
{
if (delta)
printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
perf dlfilter: Add dlfilter-show-cycles Add a new dlfilter to show cycles. Cycle counts are accumulated per CPU (or per thread if CPU is not recorded) from IPC information, and printed together with the change since the last print, at the start of each line. Separate counts are kept for branches, instructions or other events. Note also, the itrace A option can be useful to provide higher granularity cycle information. Example: $ perf record -e intel_pt/cyc/u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data ] $ perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so --deltatime | head 0 perf-exec 8509 [001] 0.000000000: psb offs: 0 0 perf-exec 8509 [001] 0.000000000: cbr: 42 freq: 4219 MHz (156%) 833 833 uname 8509 [001] 0.000047689: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _start 833 uname 8509 [001] 0.000003261: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2015 1182 uname 8509 [001] 0.000000282: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2676 661 uname 8509 [001] 0.000002629: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 3612 936 uname 8509 [001] 0.000001232: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 4579 967 uname 8509 [001] 0.000002519: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 6145 1566 uname 8509 [001] 0.000001050: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_setup_hash 6239 94 uname 8509 [001] 0.000000023: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_sysdep_start Reviewed-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: https://lore.kernel.org/r/20211027080334.365596-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-10-27 08:03:32 +00:00
else
printf("%10llu %10s ", (unsigned long long)cycles, "");
perf dlfilter: Add dlfilter-show-cycles Add a new dlfilter to show cycles. Cycle counts are accumulated per CPU (or per thread if CPU is not recorded) from IPC information, and printed together with the change since the last print, at the start of each line. Separate counts are kept for branches, instructions or other events. Note also, the itrace A option can be useful to provide higher granularity cycle information. Example: $ perf record -e intel_pt/cyc/u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data ] $ perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so --deltatime | head 0 perf-exec 8509 [001] 0.000000000: psb offs: 0 0 perf-exec 8509 [001] 0.000000000: cbr: 42 freq: 4219 MHz (156%) 833 833 uname 8509 [001] 0.000047689: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _start 833 uname 8509 [001] 0.000003261: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2015 1182 uname 8509 [001] 0.000000282: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2676 661 uname 8509 [001] 0.000002629: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 3612 936 uname 8509 [001] 0.000001232: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 4579 967 uname 8509 [001] 0.000002519: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 6145 1566 uname 8509 [001] 0.000001050: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_setup_hash 6239 94 uname 8509 [001] 0.000000023: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_sysdep_start Reviewed-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: https://lore.kernel.org/r/20211027080334.365596-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-10-27 08:03:32 +00:00
}
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
__s32 cpu = sample->cpu;
__s32 tid = sample->tid;
int pos;
pos = event_entry(sample->event);
if (cpu >= 0 && cpu < MAX_CPU) {
print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]);
cycles_rpt[cpu][pos] = cycles[cpu][pos];
return 0;
}
if (tid != -1) {
struct entry *e = find_entry(tid);
if (e) {
print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]);
e->cycles_rpt[pos] = e->cycles[pos];
return 0;
}
}
printf("%22s", "");
return 0;
}
const char *filter_description(const char **long_description)
{
static char *long_desc = "Cycle counts are accumulated per CPU (or "
"per thread if CPU is not recorded) from IPC information, and "
"printed together with the change since the last print, at the "
"start of each line. Separate counts are kept for branches, "
"instructions or other events.";
*long_description = long_desc;
return "Print the number of cycles at the start of each line";
}