perf tools fixes for v5.17: 1st batch

- Fix display of grouped aliased events in 'perf stat'.
 
 - Add missing branch_sample_type to perf_event_attr__fprintf().
 
 - Apply correct label to user/kernel symbols in branch mode.
 
 - Fix 'perf ftrace' system_wide tracing, it has to be set before creating the maps.
 
 - Return error if procfs isn't mounted for PID namespaces when synthesizing records
   for pre-existing processes.
 
 - Set error stream of objdump process for 'perf annotate' TUI, to avoid garbling the
   screen.
 
 - Add missing arm64 support to perf_mmap__read_self(), the kernel part got into 5.17.
 
 - Check for NULL pointer before dereference writing debug info about a sample.
 
 - Update UAPI copies for asound, perf_event, prctl and kvm headers.
 
 - Fix a typo in bpf_counter_cgroup.c.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYf/YRAAKCRCyPKLppCJ+
 JxH/AQC27sHgz79DrCuA83CYEBRtzXuv0AJ/naVnUAOYm8OVKwEA5fxeJQj9Kiiw
 46UIQ45fIxFAT448N9t2g61R6ZQw2g4=
 =DUlE
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-fixes-for-v5.17-2022-02-06' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix display of grouped aliased events in 'perf stat'.

 - Add missing branch_sample_type to perf_event_attr__fprintf().

 - Apply correct label to user/kernel symbols in branch mode.

 - Fix 'perf ftrace' system_wide tracing, it has to be set before
   creating the maps.

 - Return error if procfs isn't mounted for PID namespaces when
   synthesizing records for pre-existing processes.

 - Set error stream of objdump process for 'perf annotate' TUI, to avoid
   garbling the screen.

 - Add missing arm64 support to perf_mmap__read_self(), the kernel part
   got into 5.17.

 - Check for NULL pointer before dereference writing debug info about a
   sample.

 - Update UAPI copies for asound, perf_event, prctl and kvm headers.

 - Fix a typo in bpf_counter_cgroup.c.

* tag 'perf-tools-fixes-for-v5.17-2022-02-06' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf ftrace: system_wide collection is not effective by default
  libperf: Add arm64 support to perf_mmap__read_self()
  tools include UAPI: Sync sound/asound.h copy with the kernel sources
  perf stat: Fix display of grouped aliased events
  perf tools: Apply correct label to user/kernel symbols in branch mode
  perf bpf: Fix a typo in bpf_counter_cgroup.c
  perf synthetic-events: Return error if procfs isn't mounted for PID namespaces
  perf session: Check for NULL pointer before dereference
  perf annotate: Set error stream of objdump process for TUI
  perf tools: Add missing branch_sample_type to perf_event_attr__fprintf()
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools headers UAPI: Sync linux/prctl.h with the kernel sources
  perf beauty: Make the prctl arg regexp more strict to cope with PR_SET_VMA
  tools headers cpufeatures: Sync with the kernel sources
  tools headers UAPI: Sync linux/perf_event.h with the kernel sources
  tools include UAPI: Sync sound/asound.h copy with the kernel sources
This commit is contained in:
Linus Torvalds 2022-02-06 10:18:23 -08:00
commit 18118a4298
18 changed files with 186 additions and 45 deletions

View File

@ -299,7 +299,9 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */

View File

@ -1624,9 +1624,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
/* Available with KVM_CAP_XSAVE2 */
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
@ -2048,4 +2045,7 @@ struct kvm_stats_desc {
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
/* Available with KVM_CAP_XSAVE2 */
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
#endif /* __LINUX_KVM_H */

View File

@ -1332,9 +1332,9 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
#define PERF_MEM_HOPS_3 0x04 /* remote board */
#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
#define PERF_MEM_HOPS_3 0x04 /* remote board */
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43

View File

@ -272,4 +272,7 @@ struct prctl_mm_map {
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
#endif /* _LINUX_PRCTL_H */

View File

@ -56,8 +56,10 @@
* *
****************************************************************************/
#define AES_IEC958_STATUS_SIZE 24
struct snd_aes_iec958 {
unsigned char status[24]; /* AES/IEC958 channel status bits */
unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
unsigned char subcode[147]; /* AES/IEC958 subcode bits */
unsigned char pad; /* nothing */
unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
@ -202,6 +204,11 @@ typedef int __bitwise snd_pcm_format_t;
#define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */
#define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */
#define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */
/*
* For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
* available bit count in most significant bit. It's for the case of so-called 'left-justified' or
* `right-padding` sample which has less width than 32 bit.
*/
#define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10)
#define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11)
#define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12)
@ -300,7 +307,7 @@ typedef int __bitwise snd_pcm_subformat_t;
#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */
#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */
#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */
#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */
#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */

View File

@ -13,6 +13,7 @@
#include <internal/lib.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/stringify.h>
#include "internal.h"
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@ -294,6 +295,103 @@ static u64 read_timestamp(void)
return low | ((u64)high) << 32;
}
#elif defined(__aarch64__)
#define read_sysreg(r) ({ \
u64 __val; \
asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
__val; \
})
static u64 read_pmccntr(void)
{
return read_sysreg(pmccntr_el0);
}
#define PMEVCNTR_READ(idx) \
static u64 read_pmevcntr_##idx(void) { \
return read_sysreg(pmevcntr##idx##_el0); \
}
PMEVCNTR_READ(0);
PMEVCNTR_READ(1);
PMEVCNTR_READ(2);
PMEVCNTR_READ(3);
PMEVCNTR_READ(4);
PMEVCNTR_READ(5);
PMEVCNTR_READ(6);
PMEVCNTR_READ(7);
PMEVCNTR_READ(8);
PMEVCNTR_READ(9);
PMEVCNTR_READ(10);
PMEVCNTR_READ(11);
PMEVCNTR_READ(12);
PMEVCNTR_READ(13);
PMEVCNTR_READ(14);
PMEVCNTR_READ(15);
PMEVCNTR_READ(16);
PMEVCNTR_READ(17);
PMEVCNTR_READ(18);
PMEVCNTR_READ(19);
PMEVCNTR_READ(20);
PMEVCNTR_READ(21);
PMEVCNTR_READ(22);
PMEVCNTR_READ(23);
PMEVCNTR_READ(24);
PMEVCNTR_READ(25);
PMEVCNTR_READ(26);
PMEVCNTR_READ(27);
PMEVCNTR_READ(28);
PMEVCNTR_READ(29);
PMEVCNTR_READ(30);
/*
* Read a value direct from PMEVCNTR<idx>
*/
static u64 read_perf_counter(unsigned int counter)
{
static u64 (* const read_f[])(void) = {
read_pmevcntr_0,
read_pmevcntr_1,
read_pmevcntr_2,
read_pmevcntr_3,
read_pmevcntr_4,
read_pmevcntr_5,
read_pmevcntr_6,
read_pmevcntr_7,
read_pmevcntr_8,
read_pmevcntr_9,
read_pmevcntr_10,
read_pmevcntr_11,
read_pmevcntr_13,
read_pmevcntr_12,
read_pmevcntr_14,
read_pmevcntr_15,
read_pmevcntr_16,
read_pmevcntr_17,
read_pmevcntr_18,
read_pmevcntr_19,
read_pmevcntr_20,
read_pmevcntr_21,
read_pmevcntr_22,
read_pmevcntr_23,
read_pmevcntr_24,
read_pmevcntr_25,
read_pmevcntr_26,
read_pmevcntr_27,
read_pmevcntr_28,
read_pmevcntr_29,
read_pmevcntr_30,
read_pmccntr
};
if (counter < ARRAY_SIZE(read_f))
return (read_f[counter])();
return 0;
}
static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }

View File

@ -130,6 +130,9 @@ static int test_stat_user_read(int event)
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = event,
#ifdef __aarch64__
.config1 = 0x2, /* Request user access */
#endif
};
int err, i;
@ -150,7 +153,7 @@ static int test_stat_user_read(int event)
pc = perf_evsel__mmap_base(evsel, 0, 0);
__T("failed to get mmapped address", pc);
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
__T("userspace counter access not supported", pc->cap_user_rdpmc);
__T("userspace counter access not enabled", pc->index);
__T("userspace counter width not set", pc->pmc_width >= 32);

View File

@ -1115,6 +1115,7 @@ enum perf_ftrace_subcommand {
int cmd_ftrace(int argc, const char **argv)
{
int ret;
int (*cmd_func)(struct perf_ftrace *) = NULL;
struct perf_ftrace ftrace = {
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
@ -1221,6 +1222,28 @@ int cmd_ftrace(int argc, const char **argv)
goto out_delete_filters;
}
switch (subcmd) {
case PERF_FTRACE_TRACE:
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
cmd_func = __cmd_ftrace;
break;
case PERF_FTRACE_LATENCY:
if (list_empty(&ftrace.filters)) {
pr_err("Should provide a function to measure\n");
parse_options_usage(ftrace_usage, options, "T", 1);
ret = -EINVAL;
goto out_delete_filters;
}
cmd_func = __cmd_latency;
break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");
ret = -EINVAL;
goto out_delete_filters;
}
ret = target__validate(&ftrace.target);
if (ret) {
char errbuf[512];
@ -1248,27 +1271,7 @@ int cmd_ftrace(int argc, const char **argv)
goto out_delete_evlist;
}
switch (subcmd) {
case PERF_FTRACE_TRACE:
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
ret = __cmd_ftrace(&ftrace);
break;
case PERF_FTRACE_LATENCY:
if (list_empty(&ftrace.filters)) {
pr_err("Should provide a function to measure\n");
parse_options_usage(ftrace_usage, options, "T", 1);
ret = -EINVAL;
goto out_delete_evlist;
}
ret = __cmd_latency(&ftrace);
break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");
ret = -EINVAL;
break;
}
ret = cmd_func(&ftrace);
out_delete_evlist:
evlist__delete(ftrace.evlist);

View File

@ -4,7 +4,7 @@
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
printf "static const char *prctl_options[] = {\n"
regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*'
regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$'
egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
sed -r "s/$regex/\2 \1/g" | \
sort -n | xargs printf "\t[%s] = \"%s\",\n"

View File

@ -2036,6 +2036,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
memset(&objdump_process, 0, sizeof(objdump_process));
objdump_process.argv = objdump_argv;
objdump_process.out = -1;
objdump_process.err = -1;
if (start_command(&objdump_process)) {
pr_err("Failure starting to run %s\n", command);
err = -1;

View File

@ -266,7 +266,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
idx = evsel->core.idx;
err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
if (err) {
pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n",
pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
idx, evsel__name(evsel), evsel->cgrp->name);
goto out;
}

View File

@ -2073,6 +2073,7 @@ static void ip__resolve_ams(struct thread *thread,
ams->addr = ip;
ams->al_addr = al.addr;
ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
@ -2092,6 +2093,7 @@ static void ip__resolve_data(struct thread *thread,
ams->addr = addr;
ams->al_addr = al.addr;
ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;

View File

@ -18,6 +18,7 @@ struct addr_map_symbol {
struct map_symbol ms;
u64 addr;
u64 al_addr;
char al_level;
u64 phys_addr;
u64 data_page_size;
};

View File

@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
bit_name(HW_INDEX),
bit_name(TYPE_SAVE), bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name

View File

@ -1503,11 +1503,12 @@ static int machines__deliver_event(struct machines *machines,
++evlist->stats.nr_unknown_id;
return 0;
}
dump_sample(evsel, event, sample, perf_env__arch(machine->env));
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
dump_sample(evsel, event, sample, perf_env__arch(NULL));
return 0;
}
dump_sample(evsel, event, sample, perf_env__arch(machine->env));
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);

View File

@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
struct addr_map_symbol *from = &he->branch_info->from;
return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
he->level, bf, size, width);
from->al_level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
struct addr_map_symbol *to = &he->branch_info->to;
return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
he->level, bf, size, width);
to->al_level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");

View File

@ -585,15 +585,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
if (strcmp(evsel__name(alias), evsel__name(counter)) ||
alias->scale != counter->scale ||
alias->cgrp != counter->cgrp ||
strcmp(alias->unit, counter->unit) ||
evsel__is_clock(alias) != evsel__is_clock(counter) ||
!strcmp(alias->pmu_name, counter->pmu_name))
break;
alias->merged_stat = true;
cb(config, alias, data, false);
/* Merge events with the same name, etc. but on different PMUs. */
if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
alias->scale == counter->scale &&
alias->cgrp == counter->cgrp &&
!strcmp(alias->unit, counter->unit) &&
evsel__is_clock(alias) == evsel__is_clock(counter) &&
strcmp(alias->pmu_name, counter->pmu_name)) {
alias->merged_stat = true;
cb(config, alias, data, false);
}
}
}

View File

@ -1784,6 +1784,25 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
perf_event__handler_t process, bool needs_mmap,
bool data_mmap, unsigned int nr_threads_synthesize)
{
/*
* When perf runs in non-root PID namespace, and the namespace's proc FS
* is not mounted, nsinfo__is_in_root_namespace() returns false.
* In this case, the proc FS is coming for the parent namespace, thus
* perf tool will wrongly gather process info from its parent PID
* namespace.
*
* To avoid the confusion that the perf tool runs in a child PID
* namespace but it synthesizes thread info from its parent PID
* namespace, returns failure with warning.
*/
if (!nsinfo__is_in_root_namespace()) {
pr_err("Perf runs in non-root PID namespace but it tries to ");
pr_err("gather process info from its parent PID namespace.\n");
pr_err("Please mount the proc file system properly, e.g. ");
pr_err("add the option '--mount-proc' for unshare command.\n");
return -EPERM;
}
if (target__has_task(target))
return perf_event__synthesize_thread_map(tool, threads, process, machine,
needs_mmap, data_mmap);