libperf: Add support for user space counter access

x86 and arm64 can both support direct access of event counters in
userspace. The access sequence is less than trivial and currently exists
in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in
projects such as PAPI and libpfm4.

In order to support userspace access, an event must be mmapped first
with perf_evsel__mmap(). Then subsequent calls to perf_evsel__read()
will use the fast path (assuming the arch supports it).

Committer notes:

Added a '__maybe_unused' attribute to the read_perf_counter() argument
to fix the build on arches other than x86_64 and arm.

Committer testing:

  Building and running the libperf tests in verbose mode (V=1) now shows
  those "loop = N, count = N" extra lines, testing user space counter
  access.

  # make V=1 -C tools/lib/perf tests
  make: Entering directory '/home/acme/git/perf/tools/lib/perf'
  make -f /home/acme/git/perf/tools/build/Makefile.build dir=. obj=libperf
  make -C /home/acme/git/perf/tools/lib/api/ O= libapi.a
  make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fd obj=libapi
  make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fs obj=libapi
  make -C tests
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-cpumap-a test-cpumap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-threadmap-a test-threadmap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evlist-a test-evlist.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evsel-a test-evsel.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-cpumap-so test-cpumap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-threadmap-so test-threadmap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evlist-so test-evlist.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf
  gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evsel-so test-evsel.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf
  make -C tests run
  running static:
  - running test-cpumap.c...OK
  - running test-threadmap.c...OK
  - running test-evlist.c...OK
  - running test-evsel.c...
  	loop = 65536, count = 333926
  	loop = 131072, count = 655781
  	loop = 262144, count = 1311141
  	loop = 524288, count = 2630126
  	loop = 1048576, count = 5256955
  	loop = 65536, count = 524594
  	loop = 131072, count = 1058916
  	loop = 262144, count = 2097458
  	loop = 524288, count = 4205429
  	loop = 1048576, count = 8406606
  OK
  running dynamic:
  - running test-cpumap.c...OK
  - running test-threadmap.c...OK
  - running test-evlist.c...OK
  - running test-evsel.c...
  	loop = 65536, count = 328102
  	loop = 131072, count = 655782
  	loop = 262144, count = 1317494
  	loop = 524288, count = 2627851
  	loop = 1048576, count = 5255187
  	loop = 65536, count = 524601
  	loop = 131072, count = 1048923
  	loop = 262144, count = 2107917
  	loop = 524288, count = 4194606
  	loop = 1048576, count = 8409322
  OK
  make: Leaving directory '/home/acme/git/perf/tools/lib/perf'
  #

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Itaru Kitayama <itaru.kitayama@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20210414155412.3697605-4-robh@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Rob Herring 2021-04-14 11:07:39 -05:00 committed by Arnaldo Carvalho de Melo
parent d3003d9e68
commit 47d01e7b99
4 changed files with 161 additions and 0 deletions

View file

@ -267,6 +267,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
if (MMAP(evsel, cpu, thread) &&
!perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
return 0;
if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
return -errno;

View file

@ -11,6 +11,7 @@
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
struct perf_mmap;
struct perf_counts_values;
typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
@ -52,4 +53,6 @@ void perf_mmap__put(struct perf_mmap *map);
u64 perf_mmap__read_head(struct perf_mmap *map);
int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count);
#endif /* __LIBPERF_INTERNAL_MMAP_H */

View file

@ -8,9 +8,11 @@
#include <linux/perf_event.h>
#include <perf/mmap.h>
#include <perf/event.h>
#include <perf/evsel.h>
#include <internal/mmap.h>
#include <internal/lib.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include "internal.h"
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@ -273,3 +275,89 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)
return event;
}
#if defined(__i386__) || defined(__x86_64__)
static u64 read_perf_counter(unsigned int counter)
{
unsigned int low, high;
asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
return low | ((u64)high) << 32;
}
static u64 read_timestamp(void)
{
unsigned int low, high;
asm volatile("rdtsc" : "=a" (low), "=d" (high));
return low | ((u64)high) << 32;
}
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }
#endif
int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
{
struct perf_event_mmap_page *pc = map->base;
u32 seq, idx, time_mult = 0, time_shift = 0;
u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
if (!pc || !pc->cap_user_rdpmc)
return -1;
do {
seq = READ_ONCE(pc->lock);
barrier();
count->ena = READ_ONCE(pc->time_enabled);
count->run = READ_ONCE(pc->time_running);
if (pc->cap_user_time && count->ena != count->run) {
cyc = read_timestamp();
time_mult = READ_ONCE(pc->time_mult);
time_shift = READ_ONCE(pc->time_shift);
time_offset = READ_ONCE(pc->time_offset);
if (pc->cap_user_time_short) {
time_cycles = READ_ONCE(pc->time_cycles);
time_mask = READ_ONCE(pc->time_mask);
}
}
idx = READ_ONCE(pc->index);
cnt = READ_ONCE(pc->offset);
if (pc->cap_user_rdpmc && idx) {
s64 evcnt = read_perf_counter(idx - 1);
u16 width = READ_ONCE(pc->pmc_width);
evcnt <<= 64 - width;
evcnt >>= 64 - width;
cnt += evcnt;
} else
return -1;
barrier();
} while (READ_ONCE(pc->lock) != seq);
if (count->ena != count->run) {
u64 delta;
/* Adjust for cap_usr_time_short, a nop if not */
cyc = time_cycles + ((cyc - time_cycles) & time_mask);
delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
count->ena += delta;
if (idx)
count->run += delta;
cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
}
count->val = cnt;
return 0;
}

View file

@ -120,6 +120,70 @@ static int test_stat_thread_enable(void)
return 0;
}
static int test_stat_user_read(int event)
{
struct perf_counts_values counts = { .val = 0 };
struct perf_thread_map *threads;
struct perf_evsel *evsel;
struct perf_event_mmap_page *pc;
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = event,
};
int err, i;
threads = perf_thread_map__new_dummy();
__T("failed to create threads", threads);
perf_thread_map__set_pid(threads, 0, 0);
evsel = perf_evsel__new(&attr);
__T("failed to create evsel", evsel);
err = perf_evsel__open(evsel, NULL, threads);
__T("failed to open evsel", err == 0);
err = perf_evsel__mmap(evsel, 0);
__T("failed to mmap evsel", err == 0);
pc = perf_evsel__mmap_base(evsel, 0, 0);
#if defined(__i386__) || defined(__x86_64__)
__T("userspace counter access not supported", pc->cap_user_rdpmc);
__T("userspace counter access not enabled", pc->index);
__T("userspace counter width not set", pc->pmc_width >= 32);
#endif
perf_evsel__read(evsel, 0, 0, &counts);
__T("failed to read value for evsel", counts.val != 0);
for (i = 0; i < 5; i++) {
volatile int count = 0x10000 << i;
__u64 start, end, last = 0;
__T_VERBOSE("\tloop = %u, ", count);
perf_evsel__read(evsel, 0, 0, &counts);
start = counts.val;
while (count--) ;
perf_evsel__read(evsel, 0, 0, &counts);
end = counts.val;
__T("invalid counter data", (end - start) > last);
last = end - start;
__T_VERBOSE("count = %llu\n", end - start);
}
perf_evsel__munmap(evsel);
perf_evsel__close(evsel);
perf_evsel__delete(evsel);
perf_thread_map__put(threads);
return 0;
}
int main(int argc, char **argv)
{
__T_START;
@ -129,6 +193,8 @@ int main(int argc, char **argv)
test_stat_cpu();
test_stat_thread();
test_stat_thread_enable();
test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
__T_END;
return tests_failed == 0 ? 0 : -1;