bpftool: profile online CPUs instead of possible

The number of online cpu may be not equal to possible cpu.
"bpftool prog profile" can not create pmu event on possible
but on online cpu.

$ dmidecode -s system-product-name
PowerEdge R620
$ cat /sys/devices/system/cpu/possible
0-47
$ cat /sys/devices/system/cpu/online
0-31

Disable cpu dynamically:
$ echo 0 > /sys/devices/system/cpu/cpuX/online

If one cpu is offline, perf_event_open will return ENODEV.
To fix this issue:
* check value returned and skip offline cpu.
* close pmu_fd immediately on error path, avoid fd leaking.

Fixes: 47c09d6a9f ("bpftool: Introduce "prog profile" command")
Signed-off-by: Tonghao Zhang <tong@infragraf.org>
Cc: Quentin Monnet <quentin@isovalent.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20230202131701.29519-1-tong@infragraf.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Tonghao Zhang 2023-02-02 21:17:01 +08:00 committed by Martin KaFai Lau
parent 0a312cf8db
commit 377c16fa3f

View file

@ -2233,10 +2233,38 @@ static void profile_close_perf_events(struct profiler_bpf *obj)
profile_perf_event_cnt = 0;
}
static int profile_open_perf_event(int mid, int cpu, int map_fd)
{
int pmu_fd;
pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr,
-1 /*pid*/, cpu, -1 /*group_fd*/, 0);
if (pmu_fd < 0) {
if (errno == ENODEV) {
p_info("cpu %d may be offline, skip %s profiling.",
cpu, metrics[mid].name);
profile_perf_event_cnt++;
return 0;
}
return -1;
}
if (bpf_map_update_elem(map_fd,
&profile_perf_event_cnt,
&pmu_fd, BPF_ANY) ||
ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
close(pmu_fd);
return -1;
}
profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
return 0;
}
static int profile_open_perf_events(struct profiler_bpf *obj)
{
unsigned int cpu, m;
int map_fd, pmu_fd;
int map_fd;
profile_perf_events = calloc(
sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
@ -2255,17 +2283,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
if (!metrics[m].selected)
continue;
for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
-1/*pid*/, cpu, -1/*group_fd*/, 0);
if (pmu_fd < 0 ||
bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
&pmu_fd, BPF_ANY) ||
ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
if (profile_open_perf_event(m, cpu, map_fd)) {
p_err("failed to create event %s on cpu %d",
metrics[m].name, cpu);
return -1;
}
profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
}
}
return 0;