perf topology: Add core_wide

It is possible to optimize metrics when all SMT threads (CPUs) on a
core are measuring events in system wide mode. For example, TMA
metrics defines CORE_CLKS for Sandybrdige as:

if SMT is disabled:
  CPU_CLK_UNHALTED.THREAD
if SMT is enabled and recording on all SMT threads:
  CPU_CLK_UNHALTED.THREAD_ANY / 2
if SMT is enabled and not recording on all SMT threads:
  (CPU_CLK_UNHALTED.THREAD/2)*
  (1+CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE/CPU_CLK_UNHALTED.REF_XCLK )

That is two more events are necessary when not gathering counts on all
SMT threads. To distinguish all SMT threads on a core vs system wide
(all CPUs) call the new property core wide.  Add a core wide test that
determines the property from user requested CPUs, the topology and
system wide. System wide is required as other processes running on a
SMT thread will change the counts.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Ahmad Yasin <ahmad.yasin@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Florian Fischer <florian.fischer@muhq.space>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Miaoqian Lin <linmq006@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220831174926.579643-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ian Rogers 2022-08-31 10:49:23 -07:00 committed by Arnaldo Carvalho de Melo
parent 09b73fe9e3
commit cc2c4e26ec
4 changed files with 70 additions and 0 deletions

View file

@ -172,6 +172,52 @@ bool cpu_topology__smt_on(const struct cpu_topology *topology)
return false;
}
bool cpu_topology__core_wide(const struct cpu_topology *topology,
const char *user_requested_cpu_list)
{
struct perf_cpu_map *user_requested_cpus;
/*
* If user_requested_cpu_list is empty then all CPUs are recorded and so
* core_wide is true.
*/
if (!user_requested_cpu_list)
return true;
user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);
/* Check that every user requested CPU is the complete set of SMT threads on a core. */
for (u32 i = 0; i < topology->core_cpus_lists; i++) {
const char *core_cpu_list = topology->core_cpus_list[i];
struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);
struct perf_cpu cpu;
int idx;
bool has_first, first = true;
perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
if (first) {
has_first = perf_cpu_map__has(user_requested_cpus, cpu);
first = false;
} else {
/*
* If the first core CPU is user requested then
* all subsequent CPUs in the core must be user
* requested too. If the first CPU isn't user
* requested then none of the others must be
* too.
*/
if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
perf_cpu_map__put(core_cpus);
perf_cpu_map__put(user_requested_cpus);
return false;
}
}
}
perf_cpu_map__put(core_cpus);
}
perf_cpu_map__put(user_requested_cpus);
return true;
}
static bool has_die_topology(void)
{
char filename[MAXPATHLEN];

View file

@ -60,6 +60,9 @@ struct cpu_topology *cpu_topology__new(void);
void cpu_topology__delete(struct cpu_topology *tp);
/* Determine from the core list whether SMT was enabled. */
bool cpu_topology__smt_on(const struct cpu_topology *topology);
/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
bool cpu_topology__core_wide(const struct cpu_topology *topology,
const char *user_requested_cpu_list);
struct numa_topology *numa_topology__new(void);
void numa_topology__delete(struct numa_topology *tp);

View file

@ -21,3 +21,17 @@ bool smt_on(const struct cpu_topology *topology)
cached = true;
return cached_result;
}
bool core_wide(bool system_wide, const char *user_requested_cpu_list,
const struct cpu_topology *topology)
{
/* If not everything running on a core is being recorded then we can't use core_wide. */
if (!system_wide)
return false;
/* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
if (!smt_on(topology))
return true;
return cpu_topology__core_wide(topology, user_requested_cpu_list);
}

View file

@ -7,4 +7,11 @@ struct cpu_topology;
/* Returns true if SMT (aka hyperthreading) is enabled. */
bool smt_on(const struct cpu_topology *topology);
/*
* Returns true when system wide and all SMT threads for a core are in the
* user_requested_cpus map.
*/
bool core_wide(bool system_wide, const char *user_requested_cpu_list,
const struct cpu_topology *topology);
#endif /* __SMT_H */