perf stat: Setup the foundation to allow aggregation based on cache topology

Processors based on chiplet architecture, such as AMD EPYC and Hygon do
not expose the chiplet details in the sysfs CPU topology information.
However, this information can be derived from the per CPU cache level
information from the sysfs.

'perf stat' has already supported aggregation based on topology
information using core ID, socket ID, etc. It'll be useful to aggregate
based on the cache topology to detect problems like imbalance and
cache-to-cache sharing at various cache levels.

This patch lays the foundation for aggregating data in 'perf stat' based
on the processor's cache topology. The cmdline option to aggregate data
based on the cache topology is added in Patch 4 of the series while this
patch sets up all the necessary functions and variables required to
support the new aggregation option.

The patch also adds support to display per-cache aggregation, or save it
as a JSON or CSV, as splitting it into a separate patch would break
builds when compiling with "-Werror=switch-enum" where the compiler will
complain about the lack of handling for the AGGR_CACHE case in the
output functions.

Committer notes:

Don't use perf_stat_config in tools/perf/util/cpumap.c, this would make
code that is in util/, thus not really specific to a single builtin, use
a specific builtin config structure.

Move the functions introduced in this patch from
tools/perf/util/cpumap.c since it needs access to builtin specific
and is not strictly needed to live in the util/ directory.

With this 'perf test python' is back building.

Suggested-by: Gautham Shenoy <gautham.shenoy@amd.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wen Pu <puwen@hygon.cn>
Link: https://lore.kernel.org/r/20230517172745.5833-3-kprateek.nayak@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
K Prateek Nayak 2023-05-17 22:57:42 +05:30 committed by Arnaldo Carvalho de Melo
parent 2b72cec9ee
commit 995ed074b8
6 changed files with 249 additions and 1 deletions

View file

@ -11,6 +11,11 @@ struct perf_cpu {
int cpu;
};
struct perf_cache {
int cache_lvl;
int cache;
};
struct perf_cpu_map;
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);

View file

@ -142,6 +142,7 @@ struct perf_stat {
struct perf_cpu_map *cpus;
struct perf_thread_map *threads;
enum aggr_mode aggr_mode;
u32 aggr_level;
};
static struct perf_stat perf_stat;
@ -151,6 +152,7 @@ static volatile sig_atomic_t done = 0;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
.aggr_level = MAX_CACHE_LVL + 1,
.scale = true,
.unit_width = 4, /* strlen("unit") */
.run_count = 1,
@ -1249,8 +1251,132 @@ static struct option stat_options[] = {
OPT_END()
};
/**
* Calculate the cache instance ID from the map in
* /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
* Cache instance ID is the first CPU reported in the shared_cpu_list file.
*/
static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
{
int id;
struct perf_cpu_map *cpu_map = perf_cpu_map__new(map);
/*
* If the map contains no CPU, consider the current CPU to
* be the first online CPU in the cache domain else use the
* first online CPU of the cache domain as the ID.
*/
if (perf_cpu_map__empty(cpu_map))
id = cpu.cpu;
else
id = perf_cpu_map__cpu(cpu_map, 0).cpu;
/* Free the perf_cpu_map used to find the cache ID */
perf_cpu_map__put(cpu_map);
return id;
}
/**
* cpu__get_cache_id - Returns 0 if successful in populating the
* cache level and cache id. Cache level is read from
* /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID
* is the first CPU reported by
* /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
*/
static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
{
int ret = 0;
u32 cache_level = stat_config.aggr_level;
struct cpu_cache_level caches[MAX_CACHE_LVL];
u32 i = 0, caches_cnt = 0;
cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
cache->cache = -1;
ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt);
if (ret) {
/*
* If caches_cnt is not 0, cpu_cache_level data
* was allocated when building the topology.
* Free the allocated data before returning.
*/
if (caches_cnt)
goto free_caches;
return ret;
}
if (!caches_cnt)
return -1;
/*
* Save the data for the highest level if no
* level was specified by the user.
*/
if (cache_level > MAX_CACHE_LVL) {
int max_level_index = 0;
for (i = 1; i < caches_cnt; ++i) {
if (caches[i].level > caches[max_level_index].level)
max_level_index = i;
}
cache->cache_lvl = caches[max_level_index].level;
cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map);
/* Reset i to 0 to free entire caches[] */
i = 0;
goto free_caches;
}
for (i = 0; i < caches_cnt; ++i) {
if (caches[i].level == cache_level) {
cache->cache_lvl = cache_level;
cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
}
cpu_cache_level__free(&caches[i]);
}
free_caches:
/*
* Free all the allocated cpu_cache_level data.
*/
while (i < caches_cnt)
cpu_cache_level__free(&caches[i++]);
return ret;
}
/**
* aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache
* level, die and socket populated with the cache instache ID, cache level,
* die and socket for cpu. The function signature is compatible with
* aggr_cpu_id_get_t.
*/
static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
{
int ret;
struct aggr_cpu_id id;
struct perf_cache cache;
id = aggr_cpu_id__die(cpu, data);
if (aggr_cpu_id__is_empty(&id))
return id;
ret = cpu__get_cache_details(cpu, &cache);
if (ret)
return id;
id.cache_lvl = cache.cache_lvl;
id.cache = cache.cache;
return id;
}
static const char *const aggr_mode__string[] = {
[AGGR_CORE] = "core",
[AGGR_CACHE] = "cache",
[AGGR_DIE] = "die",
[AGGR_GLOBAL] = "global",
[AGGR_NODE] = "node",
@ -1272,6 +1398,12 @@ static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __m
return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
return aggr_cpu_id__cache(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@ -1324,6 +1456,12 @@ static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *con
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config,
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu);
}
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
struct perf_cpu cpu)
{
@ -1355,6 +1493,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
return aggr_cpu_id__socket;
case AGGR_DIE:
return aggr_cpu_id__die;
case AGGR_CACHE:
return aggr_cpu_id__cache;
case AGGR_CORE:
return aggr_cpu_id__core;
case AGGR_NODE:
@ -1378,6 +1518,8 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
return perf_stat__get_socket_cached;
case AGGR_DIE:
return perf_stat__get_die_cached;
case AGGR_CACHE:
return perf_stat__get_cache_id_cached;
case AGGR_CORE:
return perf_stat__get_core_cached;
case AGGR_NODE:
@ -1490,6 +1632,60 @@ static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, voi
return id;
}
static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env,
u32 cache_level, struct aggr_cpu_id *id)
{
int i;
int caches_cnt = env->caches_cnt;
struct cpu_cache_level *caches = env->caches;
id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
id->cache = -1;
if (!caches_cnt)
return;
for (i = caches_cnt - 1; i > -1; --i) {
struct perf_cpu_map *cpu_map;
int map_contains_cpu;
/*
* If user has not specified a level, find the fist level with
* the cpu in the map. Since building the map is expensive, do
* this only if levels match.
*/
if (cache_level <= MAX_CACHE_LVL && caches[i].level != cache_level)
continue;
cpu_map = perf_cpu_map__new(caches[i].map);
map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu);
perf_cpu_map__put(cpu_map);
if (map_contains_cpu != -1) {
id->cache_lvl = caches[i].level;
id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
return;
}
}
}
static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
void *data)
{
struct perf_env *env = data;
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu.cpu != -1) {
u32 cache_level = (perf_stat.aggr_level) ?: stat_config.aggr_level;
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id);
}
return id;
}
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
@ -1558,6 +1754,12 @@ static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *confi
return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
return perf_env__get_cache_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@ -1589,6 +1791,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
return perf_env__get_socket_aggr_by_cpu;
case AGGR_DIE:
return perf_env__get_die_aggr_by_cpu;
case AGGR_CACHE:
return perf_env__get_cache_aggr_by_cpu;
case AGGR_CORE:
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
@ -1612,6 +1816,8 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
return perf_stat__get_socket_file;
case AGGR_DIE:
return perf_stat__get_die_file;
case AGGR_CACHE:
return perf_stat__get_cache_file;
case AGGR_CORE:
return perf_stat__get_core_file;
case AGGR_NODE:
@ -2127,7 +2333,8 @@ static struct perf_stat perf_stat = {
.stat = perf_event__process_stat_event,
.stat_round = process_stat_round_event,
},
.aggr_mode = AGGR_UNSET,
.aggr_mode = AGGR_UNSET,
.aggr_level = 0,
};
static int __cmd_report(int argc, const char **argv)

View file

@ -222,6 +222,10 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
return a->socket - b->socket;
else if (a->die != b->die)
return a->die - b->die;
else if (a->cache_lvl != b->cache_lvl)
return a->cache_lvl - b->cache_lvl;
else if (a->cache != b->cache)
return a->cache - b->cache;
else if (a->core != b->core)
return a->core - b->core;
else
@ -679,6 +683,8 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b
a->node == b->node &&
a->socket == b->socket &&
a->die == b->die &&
a->cache_lvl == b->cache_lvl &&
a->cache == b->cache &&
a->core == b->core &&
a->cpu.cpu == b->cpu.cpu;
}
@ -689,6 +695,8 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
a->node == -1 &&
a->socket == -1 &&
a->die == -1 &&
a->cache_lvl == -1 &&
a->cache == -1 &&
a->core == -1 &&
a->cpu.cpu == -1;
}
@ -700,6 +708,8 @@ struct aggr_cpu_id aggr_cpu_id__empty(void)
.node = -1,
.socket = -1,
.die = -1,
.cache_lvl = -1,
.cache = -1,
.core = -1,
.cpu = (struct perf_cpu){ .cpu = -1 },
};

View file

@ -20,6 +20,13 @@ struct aggr_cpu_id {
int socket;
/** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
int die;
/** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */
int cache_lvl;
/**
* The cache instance ID, which is the first CPU in the
* /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
*/
int cache;
/** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
int core;
/** CPU aggregation, note there is one CPU for each SMT thread. */

View file

@ -36,6 +36,7 @@
static int aggr_header_lens[] = {
[AGGR_CORE] = 18,
[AGGR_CACHE] = 22,
[AGGR_DIE] = 12,
[AGGR_SOCKET] = 6,
[AGGR_NODE] = 6,
@ -46,6 +47,7 @@ static int aggr_header_lens[] = {
static const char *aggr_header_csv[] = {
[AGGR_CORE] = "core,cpus,",
[AGGR_CACHE] = "cache,cpus,",
[AGGR_DIE] = "die,cpus,",
[AGGR_SOCKET] = "socket,cpus,",
[AGGR_NONE] = "cpu,",
@ -56,6 +58,7 @@ static const char *aggr_header_csv[] = {
static const char *aggr_header_std[] = {
[AGGR_CORE] = "core",
[AGGR_CACHE] = "cache",
[AGGR_DIE] = "die",
[AGGR_SOCKET] = "socket",
[AGGR_NONE] = "cpu",
@ -193,6 +196,10 @@ static void print_aggr_id_std(struct perf_stat_config *config,
case AGGR_CORE:
snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core);
break;
case AGGR_CACHE:
snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d",
id.socket, id.die, id.cache_lvl, id.cache);
break;
case AGGR_DIE:
snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
break;
@ -239,6 +246,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config,
fprintf(output, "S%d-D%d-C%d%s%d%s",
id.socket, id.die, id.core, sep, aggr_nr, sep);
break;
case AGGR_CACHE:
fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s",
id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep);
break;
case AGGR_DIE:
fprintf(output, "S%d-D%d%s%d%s",
id.socket, id.die, sep, aggr_nr, sep);
@ -284,6 +295,10 @@ static void print_aggr_id_json(struct perf_stat_config *config,
fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, id.core, aggr_nr);
break;
case AGGR_CACHE:
fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
break;
case AGGR_DIE:
fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, aggr_nr);
@ -1125,6 +1140,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
case AGGR_NODE:
case AGGR_SOCKET:
case AGGR_DIE:
case AGGR_CACHE:
case AGGR_CORE:
fprintf(output, "#%*s %-*s cpus",
INTERVAL_LEN - 1, "time",
@ -1425,6 +1441,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
switch (config->aggr_mode) {
case AGGR_CORE:
case AGGR_CACHE:
case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NODE:

View file

@ -48,6 +48,7 @@ enum aggr_mode {
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_DIE,
AGGR_CACHE,
AGGR_CORE,
AGGR_THREAD,
AGGR_UNSET,
@ -64,6 +65,7 @@ typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, str
struct perf_stat_config {
enum aggr_mode aggr_mode;
u32 aggr_level;
bool scale;
bool no_inherit;
bool identifier;