mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-03 23:58:05 +00:00
perf report: Support Retire Latency
The Retire Latency field is added in the var3_w of the PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports pipeline stall of this instruction compared to the previous instruction in cycles. That's quite useful to display the information with perf mem report. The p_stage_cyc for Power is also from the var3_w. Union the p_stage_cyc and retire_lat to share the code. Implement X86 specific codes to display the X86 specific header. Add a new sort key retire_lat for the Retire Latency. Reviewed-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lore.kernel.org/lkml/20230104201349.1451191-8-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
ebab291641
commit
d7d213e04c
5 changed files with 30 additions and 1 deletions
|
@ -115,6 +115,8 @@ OPTIONS
|
|||
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
|
||||
pipeline stage. And currently supported only on powerpc.
|
||||
- addr: (Full) virtual address of the sampled instruction
|
||||
- retire_lat: On X86, this reports pipeline stall of this instruction compared
|
||||
to the previous instruction in cycles. And currently supported only on X86
|
||||
|
||||
By default, comm, dso and symbol keys are used.
|
||||
(i.e. --sort comm,dso,symbol)
|
||||
|
|
|
@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
|
|||
else {
|
||||
data->weight = weight.var1_dw;
|
||||
data->ins_lat = weight.var2_w;
|
||||
data->retire_lat = weight.var3_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,3 +103,22 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
|
|||
*array |= ((u64)data->ins_lat << 32);
|
||||
}
|
||||
}
|
||||
|
||||
const char *arch_perf_header_entry(const char *se_header)
|
||||
{
|
||||
if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
|
||||
return "Local Retire Latency";
|
||||
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
|
||||
return "Retire Latency";
|
||||
|
||||
return se_header;
|
||||
}
|
||||
|
||||
int arch_support_sort_key(const char *sort_key)
|
||||
{
|
||||
if (!strcmp(sort_key, "p_stage_cyc"))
|
||||
return 1;
|
||||
if (!strcmp(sort_key, "local_p_stage_cyc"))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -92,7 +92,10 @@ struct perf_sample {
|
|||
u8 cpumode;
|
||||
u16 misc;
|
||||
u16 ins_lat;
|
||||
u16 p_stage_cyc;
|
||||
union {
|
||||
u16 p_stage_cyc;
|
||||
u16 retire_lat;
|
||||
};
|
||||
bool no_hw_idx; /* No hw_idx collected in branch_stack */
|
||||
char insn[MAX_INSN];
|
||||
void *raw_data;
|
||||
|
|
|
@ -2133,6 +2133,8 @@ static struct sort_dimension common_sort_dimensions[] = {
|
|||
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
|
||||
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
|
||||
DIM(SORT_ADDR, "addr", sort_addr),
|
||||
DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
|
||||
DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
|
||||
};
|
||||
|
||||
#undef DIM
|
||||
|
|
|
@ -237,6 +237,8 @@ enum sort_type {
|
|||
SORT_LOCAL_PIPELINE_STAGE_CYC,
|
||||
SORT_GLOBAL_PIPELINE_STAGE_CYC,
|
||||
SORT_ADDR,
|
||||
SORT_LOCAL_RETIRE_LAT,
|
||||
SORT_GLOBAL_RETIRE_LAT,
|
||||
|
||||
/* branch stack specific sort keys */
|
||||
__SORT_BRANCH_STACK,
|
||||
|
|
Loading…
Reference in a new issue