perf tools fixes for v6.0: 5th batch

- Fail the 'perf test record' entry on error, fixing a regression where just
   setup stuff like allocating memory and not the actual things being tested failed.
 
 - Fixup disabling of -Wdeprecated-declarations for the python scripting engine, the
   previous attempt had a brown paper bag thinko.
 
 - Fix branch stack sampling test to include sanity check for branch filter on PowerPC.
 
 - Update is_ignored_symbol function to match the kernel ignored list, fixing running
   the 'perf test' entry that compares resolving symbols from kallsyms to resolving from
   vmlinux.
 
 - Augment the data source type with ARM's neoverse_spe list, the previous code
   was limited in its search resolving the data source.
 
 - Fix some clang 5 variable set but unused cases.
 
 - Get a perf cgroup more portably in BPF as the __builtin_preserve_enum_value builtin is
   not available in older versions of clang. In those cases we can forgo BPF's CO-RE (Compile
   Once, Run Everywhere).
 
 - More Fixes for Intel's hybrid CPU model.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYzY3aAAKCRCyPKLppCJ+
 J+3XAQDGalZmlY6Y0aUrCKj6+utDy7bUy+xamDaD+6gjJgkVNAD/R9YrI7roaGb3
 rx3gRJu46CD1abzaI1rMZo35DSYICgE=
 =n5Gs
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-fixes-for-v6.0-2022-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fail the 'perf test record' entry on error, fixing a regression where
   just setup stuff like allocating memory and not the actual things
   being tested failed.

 - Fixup disabling of -Wdeprecated-declarations for the python scripting
   engine, the previous attempt had a brown paper bag thinko.

 - Fix branch stack sampling test to include sanity check for branch
   filter on PowerPC.

 - Update is_ignored_symbol function to match the kernel ignored list,
   fixing running the 'perf test' entry that compares resolving symbols
   from kallsyms to resolving from vmlinux.

 - Augment the data source type with ARM's neoverse_spe list, the
   previous code was limited in its search resolving the data source.

 - Fix some clang 5 variable set but unused cases.

 - Get a perf cgroup more portably in BPF as the
   __builtin_preserve_enum_value builtin is not available in older
   versions of clang. In those cases we can forgo BPF's CO-RE (Compile
   Once, Run Everywhere).

 - More Fixes for Intel's hybrid CPU model.

* tag 'perf-tools-fixes-for-v6.0-2022-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf build: Fixup disabling of -Wdeprecated-declarations for the python scripting engine
  perf tests mmap-basic: Remove unused variable to address clang 15 warning
  perf parse-events: Ignore clang 15 warning about variable set but unused in bison produced code
  perf tests record: Fail the test if the 'errs' counter is not zero
  perf test: Fix test case 87 ("perf record tests") for hybrid systems
  perf arm-spe: augment the data source type with neoverse_spe list
  perf tests vmlinux-kallsyms: Update is_ignored_symbol function to match the kernel ignored list
  perf tests powerpc: Fix branch stack sampling test to include sanity check for branch filter
  perf parse-events: Remove "not supported" hybrid cache events
  perf print-events: Fix "perf list" can not display the PMU prefix for some hybrid cache events
  perf tools: Get a perf cgroup more portably in BPF
This commit is contained in:
Linus Torvalds 2022-09-30 16:03:19 -07:00
commit c816f2e981
14 changed files with 92 additions and 58 deletions

View File

@ -114,8 +114,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
for (i = 0; i < nsyscalls; ++i)
for (j = 0; j < expected_nr_events[i]; ++j) {
int foo = syscalls[i]();
++foo;
syscalls[i]();
}
md = &evlist->mmap[0];

View File

@ -332,7 +332,7 @@ out_delete_evlist:
out:
if (err == -EACCES)
return TEST_SKIP;
if (err < 0)
if (err < 0 || errs != 0)
return TEST_FAIL;
return TEST_OK;
}

View File

@ -61,7 +61,7 @@ test_register_capture() {
echo "Register capture test [Skipped missing registers]"
return
fi
if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \
if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
-c 1000 --per-thread true 2> /dev/null \
| perf script -F ip,sym,iregs -i - 2> /dev/null \
| egrep -q "DI:"

View File

@ -12,7 +12,8 @@ if ! [ -x "$(command -v cc)" ]; then
fi
# skip the test if the hardware doesn't support branch stack sampling
perf record -b -o- -B true > /dev/null 2>&1 || exit 2
# and if the architecture doesn't support filter types: any,save_type,u
perf record -b -o- -B --branch-filter any,save_type,u true > /dev/null 2>&1 || exit 2
TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)

View File

@ -43,10 +43,11 @@ static bool is_ignored_symbol(const char *name, char type)
/* Symbol names that begin with the following are ignored.*/
static const char * const ignored_prefixes[] = {
"$", /* local symbols for ARM, MIPS, etc. */
".LASANPC", /* s390 kasan local symbols */
".L", /* local labels, .LBB,.Ltmpxxx,.L__unnamed_xx,.LASANPC, etc. */
"__crc_", /* modversions */
"__efistub_", /* arm64 EFI stub namespace */
"__kvm_nvhe_", /* arm64 non-VHE KVM namespace */
"__kvm_nvhe_$", /* arm64 local symbols in non-VHE KVM namespace */
"__kvm_nvhe_.L", /* arm64 local symbols in non-VHE KVM namespace */
"__AArch64ADRPThunk_", /* arm64 lld */
"__ARMV5PILongThunk_", /* arm lld */
"__ARMV7PILongThunk_",

View File

@ -269,7 +269,7 @@ CFLAGS_expr-flex.o += $(flex_flags)
bison_flags := -DYYENABLE_NLS=0
BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
ifeq ($(BISON_GE_35),1)
bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option
else
bison_flags += -w
endif

View File

@ -498,7 +498,7 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
union perf_mem_data_src data_src = { 0 };
bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;

View File

@ -48,6 +48,7 @@ const volatile __u32 num_cpus = 1;
int enabled = 0;
int use_cgroup_v2 = 0;
int perf_subsys_id = -1;
static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
{
@ -58,7 +59,15 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
int level;
int cnt;
cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup);
if (perf_subsys_id == -1) {
#if __has_builtin(__builtin_preserve_enum_value)
perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
perf_event_cgrp_id);
#else
perf_subsys_id = perf_event_cgrp_id;
#endif
}
cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup);
level = BPF_CORE_READ(cgrp, level);
for (cnt = 0; i < MAX_LEVELS; i++) {

View File

@ -94,6 +94,8 @@ const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false;
const volatile bool uses_cgroup_v1 = false;
int perf_subsys_id = -1;
/*
* Old kernel used to call it task_struct->state and now it's '__state'.
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
@ -119,11 +121,19 @@ static inline __u64 get_cgroup_id(struct task_struct *t)
{
struct cgroup *cgrp;
if (uses_cgroup_v1)
cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
else
cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
if (!uses_cgroup_v1)
return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id);
if (perf_subsys_id == -1) {
#if __has_builtin(__builtin_preserve_enum_value)
perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
perf_event_cgrp_id);
#else
perf_subsys_id = perf_event_cgrp_id;
#endif
}
cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup);
return BPF_CORE_READ(cgrp, kn, id);
}

View File

@ -33,7 +33,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr,
* If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
*/
attr->type = type;
attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
attr->config = (attr->config & PERF_HW_EVENT_MASK) |
((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
}
static int create_event_hybrid(__u32 config_type, int *idx,
@ -48,13 +49,25 @@ static int create_event_hybrid(__u32 config_type, int *idx,
__u64 config = attr->config;
config_hybrid_attr(attr, config_type, pmu->type);
/*
* Some hybrid hardware cache events are only available on one CPU
* PMU. For example, the 'L1-dcache-load-misses' is only available
* on cpu_core, while the 'L1-icache-loads' is only available on
* cpu_atom. We need to remove "not supported" hybrid cache events.
*/
if (attr->type == PERF_TYPE_HW_CACHE
&& !is_event_supported(attr->type, attr->config))
return 0;
evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
pmu, config_terms);
if (evsel)
if (evsel) {
evsel->pmu_name = strdup(pmu->name);
else
if (!evsel->pmu_name)
return -ENOMEM;
} else
return -ENOMEM;
attr->type = type;
attr->config = config;
return 0;

View File

@ -28,6 +28,7 @@
#include "util/parse-events-hybrid.h"
#include "util/pmu-hybrid.h"
#include "tracepoint.h"
#include "thread_map.h"
#define MAX_NAME_LEN 100
@ -157,6 +158,44 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
bool is_event_supported(u8 type, u64 config)
{
bool ret = true;
int open_return;
struct evsel *evsel;
struct perf_event_attr attr = {
.type = type,
.config = config,
.disabled = 1,
};
struct perf_thread_map *tmap = thread_map__new_by_tid(0);
if (tmap == NULL)
return false;
evsel = evsel__new(&attr);
if (evsel) {
open_return = evsel__open(evsel, NULL, tmap);
ret = open_return >= 0;
if (open_return == -EACCES) {
/*
* This happens if the paranoid value
* /proc/sys/kernel/perf_event_paranoid is set to 2
* Re-run with exclude_kernel set; we don't do that
* by default as some ARM machines do not support it.
*
*/
evsel->core.attr.exclude_kernel = 1;
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
evsel__delete(evsel);
}
perf_thread_map__put(tmap);
return ret;
}
const char *event_type(int type)
{
switch (type) {

View File

@ -19,6 +19,7 @@ struct option;
struct perf_pmu;
bool have_tracepoints(struct list_head *evlist);
bool is_event_supported(u8 type, u64 config);
const char *event_type(int type);

View File

@ -22,7 +22,6 @@
#include "probe-file.h"
#include "string2.h"
#include "strlist.h"
#include "thread_map.h"
#include "tracepoint.h"
#include "pfm.h"
#include "pmu-hybrid.h"
@ -239,44 +238,6 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob,
strlist__delete(sdtlist);
}
static bool is_event_supported(u8 type, unsigned int config)
{
bool ret = true;
int open_return;
struct evsel *evsel;
struct perf_event_attr attr = {
.type = type,
.config = config,
.disabled = 1,
};
struct perf_thread_map *tmap = thread_map__new_by_tid(0);
if (tmap == NULL)
return false;
evsel = evsel__new(&attr);
if (evsel) {
open_return = evsel__open(evsel, NULL, tmap);
ret = open_return >= 0;
if (open_return == -EACCES) {
/*
* This happens if the paranoid value
* /proc/sys/kernel/perf_event_paranoid is set to 2
* Re-run with exclude_kernel set; we don't do that
* by default as some ARM machines do not support it.
*
*/
evsel->core.attr.exclude_kernel = 1;
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
evsel__delete(evsel);
}
perf_thread_map__put(tmap);
return ret;
}
int print_hwcache_events(const char *event_glob, bool name_only)
{
unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0;

View File

@ -3,4 +3,4 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-error=deprecated-declarations
CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations