From e6f2dd0f80674e9d5960337b3e9c2a242441b326 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 29 Nov 2021 19:06:19 -0800 Subject: [PATCH 1/4] bpf: Add bpf_loop helper This patch adds the kernel-side and API changes for a new helper function, bpf_loop: long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags); where long (*callback_fn)(u32 index, void *ctx); bpf_loop invokes the "callback_fn" **nr_loops** times or until the callback_fn returns 1. The callback_fn can only return 0 or 1, and this is enforced by the verifier. The callback_fn index is zero-indexed. A few things to please note: ~ The "u64 flags" parameter is currently unused but is included in case a future use case for it arises. ~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c), bpf_callback_t is used as the callback function cast. ~ A program can have nested bpf_loop calls but the program must still adhere to the verifier constraint of its stack depth (the stack depth cannot exceed MAX_BPF_STACK)) ~ Recursive callback_fns do not pass the verifier, due to the call stack for these being too deep. ~ The next patch will include the tests and benchmark Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211130030622.4131246-2-joannekoong@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 25 ++++++++++ kernel/bpf/bpf_iter.c | 35 ++++++++++++++ kernel/bpf/helpers.c | 2 + kernel/bpf/verifier.c | 88 +++++++++++++++++++++------------- tools/include/uapi/linux/bpf.h | 25 ++++++++++ 6 files changed, 142 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc7a0c36e7df..cad0829710be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; +extern const struct bpf_func_proto bpf_loop_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a69e4b04ffeb..211b43afd0fb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4957,6 +4957,30 @@ union bpf_attr { * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. * **-EBUSY** if failed to try lock mmap_lock. * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5140,6 +5164,7 @@ union bpf_attr { FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ FN(find_vma), \ + FN(loop), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b2ee45064e06..b7aef5b3416d 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +/* maximum number of loops */ +#define MAX_LOOPS BIT(23) + +BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, + u64, flags) +{ + bpf_callback_t callback = (bpf_callback_t)callback_fn; + u64 ret; + u32 i; + + if (flags) + return -EINVAL; + if (nr_loops > MAX_LOOPS) + return -E2BIG; + + for (i = 0; i < nr_loops; i++) { + ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); + /* return value: 0 - continue, 1 - stop and return */ + if (ret) + return i + 1; + } + + return i; +} + +const struct bpf_func_proto bpf_loop_proto = { + .func = bpf_loop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ffd469c217f..52188004a9c3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_query_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; + case BPF_FUNC_loop: + return &bpf_loop_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0763cca139a7..d7678d8a925c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_loop_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, + * u64 flags); + * callback_fn(u32 index, void *callback_ctx); + */ + callee->regs[BPF_REG_1].type = SCALAR_VALUE; + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; + return 0; +} + static int set_timer_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (func_id == BPF_FUNC_tail_call) { - err = check_reference_leak(env); - if (err) { - verbose(env, "tail_call would lead to reference leak\n"); - return err; - } - } else if (is_release_function(func_id)) { + if (is_release_function(func_id)) { err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", @@ -6475,41 +6490,46 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); - /* check that flags argument in get_local_storage(map, flags) is 0, - * this is required because get_local_storage() can't return an error. - */ - if (func_id == BPF_FUNC_get_local_storage && - !register_is_null(®s[BPF_REG_2])) { - verbose(env, "get_local_storage() doesn't support non-zero flags\n"); - return -EINVAL; - } - - if (func_id == BPF_FUNC_for_each_map_elem) { + switch (func_id) { + case BPF_FUNC_tail_call: + err = check_reference_leak(env); + if (err) { + verbose(env, "tail_call would lead to reference leak\n"); + return err; + } + break; + case BPF_FUNC_get_local_storage: + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (!register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; + } + break; + case BPF_FUNC_for_each_map_elem: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_map_elem_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_timer_set_callback) { + break; + case BPF_FUNC_timer_set_callback: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_timer_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_find_vma) { + break; + case BPF_FUNC_find_vma: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_find_vma_callback_state); - if (err < 0) - return -EINVAL; + break; + case BPF_FUNC_snprintf: + err = check_bpf_snprintf_call(env, regs); + break; + case BPF_FUNC_loop: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_loop_callback_state); + break; } - if (func_id == BPF_FUNC_snprintf) { - err = check_bpf_snprintf_call(env, regs); - if (err < 0) - return err; - } + if (err) + return err; /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a69e4b04ffeb..211b43afd0fb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4957,6 +4957,30 @@ union bpf_attr { * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. * **-EBUSY** if failed to try lock mmap_lock. * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5140,6 +5164,7 @@ union bpf_attr { FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ FN(find_vma), \ + FN(loop), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper From 4e5070b64b375a9c1f570893cfceeba108382bef Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 29 Nov 2021 19:06:20 -0800 Subject: [PATCH 2/4] selftests/bpf: Add bpf_loop test Add test for bpf_loop testing a variety of cases: various nr_loops, null callback ctx, invalid flags, nested callbacks. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211130030622.4131246-3-joannekoong@fb.com --- .../selftests/bpf/prog_tests/bpf_loop.c | 145 ++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_loop.c | 112 ++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_loop.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_loop.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c new file mode 100644 index 000000000000..380d7a2072e3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include +#include "bpf_loop.skel.h" + +static void check_nr_loops(struct bpf_loop *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.test_prog); + if (!ASSERT_OK_PTR(link, "link")) + return; + + /* test 0 loops */ + skel->bss->nr_loops = 0; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, + "0 loops"); + + /* test 500 loops */ + skel->bss->nr_loops = 500; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, + "500 loops"); + ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output"); + + /* test exceeding the max limit */ + skel->bss->nr_loops = -1; + + usleep(1); + + ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit"); + + bpf_link__destroy(link); +} + +static void check_callback_fn_stop(struct bpf_loop *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.test_prog); + if (!ASSERT_OK_PTR(link, "link")) + return; + + /* testing that loop is stopped when callback_fn returns 1 */ + skel->bss->nr_loops = 400; + skel->data->stop_index = 50; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1, + "nr_loops_returned"); + ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2, + "g_output"); + + bpf_link__destroy(link); +} + +static void check_null_callback_ctx(struct bpf_loop *skel) +{ + struct bpf_link *link; + + /* check that user is able to pass in a null callback_ctx */ + link = bpf_program__attach(skel->progs.prog_null_ctx); + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->nr_loops = 10; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, + "nr_loops_returned"); + + bpf_link__destroy(link); +} + +static void check_invalid_flags(struct bpf_loop *skel) +{ + struct bpf_link *link; + + /* check that passing in non-zero flags returns -EINVAL */ + link = bpf_program__attach(skel->progs.prog_invalid_flags); + if (!ASSERT_OK_PTR(link, "link")) + return; + + usleep(1); + + ASSERT_EQ(skel->bss->err, -EINVAL, "err"); + + bpf_link__destroy(link); +} + +static void check_nested_calls(struct bpf_loop *skel) +{ + __u32 nr_loops = 100, nested_callback_nr_loops = 4; + struct bpf_link *link; + + /* check that nested calls are supported */ + link = bpf_program__attach(skel->progs.prog_nested_calls); + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->nr_loops = nr_loops; + skel->bss->nested_callback_nr_loops = nested_callback_nr_loops; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops + * nested_callback_nr_loops, "nr_loops_returned"); + ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops + * nr_loops, "g_output"); + + bpf_link__destroy(link); +} + +void test_bpf_loop(void) +{ + struct bpf_loop *skel; + + skel = bpf_loop__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load")) + return; + + skel->bss->pid = getpid(); + + if (test__start_subtest("check_nr_loops")) + check_nr_loops(skel); + if (test__start_subtest("check_callback_fn_stop")) + check_callback_fn_stop(skel); + if (test__start_subtest("check_null_callback_ctx")) + check_null_callback_ctx(skel); + if (test__start_subtest("check_invalid_flags")) + check_invalid_flags(skel); + if (test__start_subtest("check_nested_calls")) + check_nested_calls(skel); + + bpf_loop__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c new file mode 100644 index 000000000000..12349e4601e8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct callback_ctx { + int output; +}; + +/* These should be set by the user program */ +u32 nested_callback_nr_loops; +u32 stop_index = -1; +u32 nr_loops; +int pid; + +/* Making these global variables so that the userspace program + * can verify the output through the skeleton + */ +int nr_loops_returned; +int g_output; +int err; + +static int callback(__u32 index, void *data) +{ + struct callback_ctx *ctx = data; + + if (index >= stop_index) + return 1; + + ctx->output += index; + + return 0; +} + +static int empty_callback(__u32 index, void *data) +{ + return 0; +} + +static int nested_callback2(__u32 index, void *data) +{ + nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0); + + return 0; +} + +static int nested_callback1(__u32 index, void *data) +{ + bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0); + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int test_prog(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0); + + if (nr_loops_returned < 0) + err = nr_loops_returned; + else + g_output = data.output; + + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int prog_null_ctx(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0); + + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int prog_invalid_flags(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + err = bpf_loop(nr_loops, callback, &data, 1); + + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int prog_nested_calls(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + nr_loops_returned = 0; + bpf_loop(nr_loops, nested_callback1, &data, 0); + + g_output = data.output; + + return 0; +} From f6e659b7f97c76d0471d12bf274ea2a097cf3c5c Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 29 Nov 2021 19:06:21 -0800 Subject: [PATCH 3/4] selftests/bpf: Measure bpf_loop verifier performance This patch tests bpf_loop in pyperf and strobemeta, and measures the verifier performance of replacing the traditional for loop with bpf_loop. The results are as follows: ~strobemeta~ Baseline verification time 6808200 usec stack depth 496 processed 554252 insns (limit 1000000) max_states_per_insn 16 total_states 15878 peak_states 13489 mark_read 3110 #192 verif_scale_strobemeta:OK (unrolled loop) Using bpf_loop verification time 31589 usec stack depth 96+400 processed 1513 insns (limit 1000000) max_states_per_insn 2 total_states 106 peak_states 106 mark_read 60 #193 verif_scale_strobemeta_bpf_loop:OK ~pyperf600~ Baseline verification time 29702486 usec stack depth 368 processed 626838 insns (limit 1000000) max_states_per_insn 7 total_states 30368 peak_states 30279 mark_read 748 #182 verif_scale_pyperf600:OK (unrolled loop) Using bpf_loop verification time 148488 usec stack depth 320+40 processed 10518 insns (limit 1000000) max_states_per_insn 10 total_states 705 peak_states 517 mark_read 38 #183 verif_scale_pyperf600_bpf_loop:OK Using the bpf_loop helper led to approximately a 99% decrease in the verification time and in the number of instructions. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com --- .../bpf/prog_tests/bpf_verif_scale.c | 12 +++ tools/testing/selftests/bpf/progs/pyperf.h | 71 +++++++++++++++++- .../selftests/bpf/progs/pyperf600_bpf_loop.c | 6 ++ .../testing/selftests/bpf/progs/strobemeta.h | 75 ++++++++++++++++++- .../selftests/bpf/progs/strobemeta_bpf_loop.c | 9 +++ 5 files changed, 169 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c create mode 100644 tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 27f5d8ea7964..1fb16f8dad56 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -115,6 +115,12 @@ void test_verif_scale_pyperf600() scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_pyperf600_bpf_loop(void) +{ + /* use the bpf_loop helper*/ + scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_pyperf600_nounroll() { /* no unroll at all. @@ -165,6 +171,12 @@ void test_verif_scale_strobemeta() scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_strobemeta_bpf_loop(void) +{ + /* use the bpf_loop helper*/ + scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_strobemeta_nounroll1() { /* no unroll, tiny loops */ diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 2fb7adafb6b6..1ed28882daf3 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -159,6 +159,59 @@ struct { __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); +#ifdef USE_BPF_LOOP +struct process_frame_ctx { + int cur_cpu; + int32_t *symbol_counter; + void *frame_ptr; + FrameData *frame; + PidData *pidData; + Symbol *sym; + Event *event; + bool done; +}; + +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) + +static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) +{ + int zero = 0; + void *frame_ptr = ctx->frame_ptr; + PidData *pidData = ctx->pidData; + FrameData *frame = ctx->frame; + int32_t *symbol_counter = ctx->symbol_counter; + int cur_cpu = ctx->cur_cpu; + Event *event = ctx->event; + Symbol *sym = ctx->sym; + + if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { + int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; + int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); + + if (!symbol_id) { + bpf_map_update_elem(&symbolmap, sym, &zero, 0); + symbol_id = bpf_map_lookup_elem(&symbolmap, sym); + if (!symbol_id) { + ctx->done = true; + return 1; + } + } + if (*symbol_id == new_symbol_id) + (*symbol_counter)++; + + barrier_var(i); + if (i >= STACK_MAX_LEN) + return 1; + + event->stack[i] = *symbol_id; + + event->stack_len = i + 1; + frame_ptr = frame->f_back; + } + return 0; +} +#endif /* USE_BPF_LOOP */ + #ifdef GLOBAL_FUNC __noinline #elif defined(SUBPROGS) @@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); if (symbol_counter == NULL) return 0; +#ifdef USE_BPF_LOOP + struct process_frame_ctx ctx = { + .cur_cpu = cur_cpu, + .symbol_counter = symbol_counter, + .frame_ptr = frame_ptr, + .frame = &frame, + .pidData = pidData, + .sym = &sym, + .event = event, + }; + + bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); + if (ctx.done) + return 0; +#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma clang loop unroll(full) -#endif +#endif /* NO_UNROLL */ /* Unwind python stack */ for (int i = 0; i < STACK_MAX_LEN; ++i) { if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { @@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) frame_ptr = frame.f_back; } } +#endif /* USE_BPF_LOOP */ event->stack_complete = frame_ptr == NULL; } else { event->stack_complete = 1; diff --git a/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c new file mode 100644 index 000000000000..5c2059dc01af --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#define STACK_MAX_LEN 600 +#define USE_BPF_LOOP +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 60c93aee2f4a..753718595c26 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, return payload; } +#ifdef USE_BPF_LOOP +enum read_type { + READ_INT_VAR, + READ_MAP_VAR, + READ_STR_VAR, +}; + +struct read_var_ctx { + struct strobemeta_payload *data; + void *tls_base; + struct strobemeta_cfg *cfg; + void *payload; + /* value gets mutated */ + struct strobe_value_generic *value; + enum read_type type; +}; + +static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +{ + switch (ctx->type) { + case READ_INT_VAR: + if (index >= STROBE_MAX_INTS) + return 1; + read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data); + break; + case READ_MAP_VAR: + if (index >= STROBE_MAX_MAPS) + return 1; + ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload); + break; + case READ_STR_VAR: + if (index >= STROBE_MAX_STRS) + return 1; + ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload); + break; + } + return 0; +} +#endif /* USE_BPF_LOOP */ + /* * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends @@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task, */ tls_base = (void *)task; +#ifdef USE_BPF_LOOP + struct read_var_ctx ctx = { + .cfg = cfg, + .tls_base = tls_base, + .value = &value, + .data = data, + .payload = payload, + }; + int err; + + ctx.type = READ_INT_VAR; + err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_INTS) + return NULL; + + ctx.type = READ_STR_VAR; + err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_STRS) + return NULL; + + ctx.type = READ_MAP_VAR; + err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_MAPS) + return NULL; +#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_INTS; ++i) { read_int_var(cfg, i, tls_base, &value, data); } @@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { payload += read_str_var(cfg, i, tls_base, &value, data, payload); } @@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { payload = read_map_var(cfg, i, tls_base, &value, data, payload); } +#endif /* USE_BPF_LOOP */ + /* * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent diff --git a/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c new file mode 100644 index 000000000000..d18b992f0165 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 100 +#define STROBE_MAX_MAP_ENTRIES 20 +#define USE_BPF_LOOP +#include "strobemeta.h" From ec151037af4f56065d5b258af82f13dbbf279ebd Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 29 Nov 2021 19:06:22 -0800 Subject: [PATCH 4/4] selftest/bpf/benchs: Add bpf_loop benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add benchmark to measure the throughput and latency of the bpf_loop call. Testing this on my dev machine on 1 thread, the data is as follows: nr_loops: 10 bpf_loop - throughput: 198.519 ± 0.155 M ops/s, latency: 5.037 ns/op nr_loops: 100 bpf_loop - throughput: 247.448 ± 0.305 M ops/s, latency: 4.041 ns/op nr_loops: 500 bpf_loop - throughput: 260.839 ± 0.380 M ops/s, latency: 3.834 ns/op nr_loops: 1000 bpf_loop - throughput: 262.806 ± 0.629 M ops/s, latency: 3.805 ns/op nr_loops: 5000 bpf_loop - throughput: 264.211 ± 1.508 M ops/s, latency: 3.785 ns/op nr_loops: 10000 bpf_loop - throughput: 265.366 ± 3.054 M ops/s, latency: 3.768 ns/op nr_loops: 50000 bpf_loop - throughput: 235.986 ± 20.205 M ops/s, latency: 4.238 ns/op nr_loops: 100000 bpf_loop - throughput: 264.482 ± 0.279 M ops/s, latency: 3.781 ns/op nr_loops: 500000 bpf_loop - throughput: 309.773 ± 87.713 M ops/s, latency: 3.228 ns/op nr_loops: 1000000 bpf_loop - throughput: 262.818 ± 4.143 M ops/s, latency: 3.805 ns/op >From this data, we can see that the latency per loop decreases as the number of loops increases. On this particular machine, each loop had an overhead of about ~4 ns, and we were able to run ~250 million loops per second. Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211130030622.4131246-5-joannekoong@fb.com --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bench.c | 37 ++++++ tools/testing/selftests/bpf/bench.h | 2 + .../selftests/bpf/benchs/bench_bpf_loop.c | 105 ++++++++++++++++++ .../bpf/benchs/run_bench_bpf_loop.sh | 15 +++ .../selftests/bpf/benchs/run_common.sh | 15 +++ .../selftests/bpf/progs/bpf_loop_bench.c | 26 +++++ 7 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_loop.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh create mode 100644 tools/testing/selftests/bpf/progs/bpf_loop_bench.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 35684d61aaeb..a6c0e92c86a1 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -531,6 +531,7 @@ $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h +$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -540,7 +541,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o \ - $(OUTPUT)/bench_bloom_filter_map.o + $(OUTPUT)/bench_bloom_filter_map.o \ + $(OUTPUT)/bench_bpf_loop.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index c75e7ee28746..3d6082b97a56 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -134,6 +134,39 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) total_ops_mean, total_ops_stddev); } +void ops_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double hits_per_sec, hits_per_prod; + + hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0); + hits_per_prod = hits_per_sec / env.producer_cnt; + + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod); +} + +void ops_report_final(struct bench_res res[], int res_cnt) +{ + double hits_mean = 0.0, hits_stddev = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + + hits_stddev = sqrt(hits_stddev); + } + printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ", + hits_mean, hits_stddev, hits_mean / env.producer_cnt); + printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); +} + const char *argp_program_version = "benchmark"; const char *argp_program_bug_address = ""; const char argp_program_doc[] = @@ -171,10 +204,12 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; +extern struct argp bench_bpf_loop_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, + { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, {}, }; @@ -373,6 +408,7 @@ extern const struct bench bench_bloom_update; extern const struct bench bench_bloom_false_positive; extern const struct bench bench_hashmap_without_bloom; extern const struct bench bench_hashmap_with_bloom; +extern const struct bench bench_bpf_loop; static const struct bench *benchs[] = { &bench_count_global, @@ -404,6 +440,7 @@ static const struct bench *benchs[] = { &bench_bloom_false_positive, &bench_hashmap_without_bloom, &bench_hashmap_with_bloom, + &bench_bpf_loop, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 624c6b11501f..50785503756b 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -59,6 +59,8 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); void false_hits_report_final(struct bench_res res[], int res_cnt); +void ops_report_progress(int iter, struct bench_res *res, long delta_ns); +void ops_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns() { struct timespec t; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c new file mode 100644 index 000000000000..d0a6572bfab6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include "bench.h" +#include "bpf_loop_bench.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_loop_bench *skel; +} ctx; + +static struct { + __u32 nr_loops; +} args = { + .nr_loops = 10, +}; + +enum { + ARG_NR_LOOPS = 4000, +}; + +static const struct argp_option opts[] = { + { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0, + "Set number of loops for the bpf_loop helper"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_LOOPS: + args.nr_loops = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bpf_loop_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) + /* trigger the bpf program */ + syscall(__NR_getpgid); + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup(void) +{ + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = bpf_loop_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = args.nr_loops; +} + +const struct bench bench_bpf_loop = { + .name = "bpf-loop", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh new file mode 100755 index 000000000000..d4f5f73b356b --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for t in 1 4 8 12 16; do +for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do +subtitle "nr_loops: $i, nr_threads: $t" + summarize_ops "bpf_loop: " \ + "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)" + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 9a16be78b180..6c5e6023a69f 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -33,6 +33,14 @@ function percentage() echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" } +function ops() +{ + echo -n "throughput: " + echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", latency: " + echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" +} + function total() { echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" @@ -52,6 +60,13 @@ function summarize_percentage() printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" } +function summarize_ops() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(ops $summary)" +} + function summarize_total() { bench="$1" diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c new file mode 100644 index 000000000000..9dafdc244462 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +u32 nr_loops; +long hits; + +static int empty_callback(__u32 index, void *data) +{ + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int benchmark(void *ctx) +{ + for (int i = 0; i < 1000; i++) { + bpf_loop(nr_loops, empty_callback, NULL, 0); + + __sync_add_and_fetch(&hits, nr_loops); + } + return 0; +}