diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index d3e734bf8056..624527401d4d 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -649,6 +649,11 @@ static int bpf_iter_init_array_map(void *priv_data, seq_info->percpu_value_buf = value_buf; } + /* bpf_iter_attach_map() acquires a map uref, and the uref may be + * released before or in the middle of iterating map elements, so + * acquire an extra map uref for iterator. + */ + bpf_map_inc_with_uref(map); seq_info->map = map; return 0; } @@ -657,6 +662,7 @@ static void bpf_iter_fini_array_map(void *priv_data) { struct bpf_iter_seq_array_map_info *seq_info = priv_data; + bpf_map_put_with_uref(seq_info->map); kfree(seq_info->percpu_value_buf); } diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 2726a5950cfa..24b755eca0b3 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -68,13 +68,18 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo) +{ + return tinfo->reg_info->feature & BPF_ITER_RESCHED; +} + static bool bpf_iter_support_resched(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); - return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; + return bpf_iter_target_support_resched(iter_priv->tinfo); } /* maximum visited objects before bailing out */ @@ -537,6 +542,10 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, if (!tinfo) return -ENOENT; + /* Only allow sleepable program for resched-able iterator */ + if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo)) + return -EINVAL; + link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4d793a92301b..6c530a5e560a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2060,6 +2060,7 @@ static int bpf_iter_init_hash_map(void *priv_data, seq_info->percpu_value_buf = value_buf; } + bpf_map_inc_with_uref(map); seq_info->map = map; seq_info->htab = container_of(map, struct bpf_htab, map); return 0; @@ -2069,6 +2070,7 @@ static void bpf_iter_fini_hash_map(void *priv_data) { struct bpf_iter_seq_hash_map_info *seq_info = priv_data; + bpf_map_put_with_uref(seq_info->map); kfree(seq_info->percpu_value_buf); } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index a25ec93729b9..1b7f385643b4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -875,10 +875,18 @@ static int bpf_iter_init_sk_storage_map(void *priv_data, { struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + bpf_map_inc_with_uref(aux->map); seq_info->map = aux->map; return 0; } +static void bpf_iter_fini_sk_storage_map(void *priv_data) +{ + struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + + bpf_map_put_with_uref(seq_info->map); +} + static int bpf_iter_attach_map(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux) @@ -896,7 +904,7 @@ static int bpf_iter_attach_map(struct bpf_prog *prog, if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) goto put_map; - if (prog->aux->max_rdonly_access > map->value_size) { + if (prog->aux->max_rdwr_access > map->value_size) { err = -EACCES; goto put_map; } @@ -924,7 +932,7 @@ static const struct seq_operations bpf_sk_storage_map_seq_ops = { static const struct bpf_iter_seq_info iter_seq_info = { .seq_ops = &bpf_sk_storage_map_seq_ops, .init_seq_private = bpf_iter_init_sk_storage_map, - .fini_seq_private = NULL, + .fini_seq_private = bpf_iter_fini_sk_storage_map, .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), }; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 028813dfecb0..9a9fb9487d63 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -783,13 +783,22 @@ static int sock_map_init_seq_private(void *priv_data, { struct sock_map_seq_info *info = priv_data; + bpf_map_inc_with_uref(aux->map); info->map = aux->map; return 0; } +static void sock_map_fini_seq_private(void *priv_data) +{ + struct sock_map_seq_info *info = priv_data; + + bpf_map_put_with_uref(info->map); +} + static const struct bpf_iter_seq_info sock_map_iter_seq_info = { .seq_ops = &sock_map_seq_ops, .init_seq_private = sock_map_init_seq_private, + .fini_seq_private = sock_map_fini_seq_private, .seq_priv_size = sizeof(struct sock_map_seq_info), }; @@ -1369,18 +1378,27 @@ static const struct seq_operations sock_hash_seq_ops = { }; static int sock_hash_init_seq_private(void *priv_data, - struct bpf_iter_aux_info *aux) + struct bpf_iter_aux_info *aux) { struct sock_hash_seq_info *info = priv_data; + bpf_map_inc_with_uref(aux->map); info->map = aux->map; info->htab = container_of(aux->map, struct bpf_shtab, map); return 0; } +static void sock_hash_fini_seq_private(void *priv_data) +{ + struct sock_hash_seq_info *info = priv_data; + + bpf_map_put_with_uref(info->map); +} + static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { .seq_ops = &sock_hash_seq_ops, .init_seq_private = sock_hash_init_seq_private, + .fini_seq_private = sock_hash_fini_seq_private, .seq_priv_size = sizeof(struct sock_hash_seq_info), }; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index a33874b081b6..e89685bd587c 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -28,6 +28,7 @@ #include "bpf_iter_test_kern6.skel.h" #include "bpf_iter_bpf_link.skel.h" #include "bpf_iter_ksym.skel.h" +#include "bpf_iter_sockmap.skel.h" static int duration; @@ -67,6 +68,50 @@ free_link: bpf_link__destroy(link); } +static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog, + struct bpf_map *map) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link *link; + char buf[16] = {}; + int iter_fd, len; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(map); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(prog, &opts); + if (!ASSERT_OK_PTR(link, "attach_map_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "create_map_iter")) { + bpf_link__destroy(link); + return; + } + + /* Close link and map fd prematurely */ + bpf_link__destroy(link); + bpf_object__destroy_skeleton(*skel); + *skel = NULL; + + /* Try to let map free work to run first if map is freed */ + usleep(100); + /* Memory used by both sock map and sock local storage map are + * freed after two synchronize_rcu() calls, so wait for it + */ + kern_sync_rcu(); + kern_sync_rcu(); + + /* Read after both map fd and link fd are closed */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + ASSERT_GE(len, 0, "read_iterator"); + + close(iter_fd); +} + static int read_fd_into_buffer(int fd, char *buf, int size) { int bufleft = size; @@ -634,6 +679,12 @@ static void test_bpf_hash_map(void) goto out; } + /* Sleepable program is prohibited for hash map iterator */ + linfo.map.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.sleepable_dummy_dump, &opts); + if (!ASSERT_ERR_PTR(link, "attach_sleepable_prog_to_iter")) + goto out; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (!ASSERT_OK_PTR(link, "attach_iter")) @@ -827,6 +878,20 @@ out: bpf_iter_bpf_array_map__destroy(skel); } +static void test_bpf_array_map_iter_fd(void) +{ + struct bpf_iter_bpf_array_map *skel; + + skel = bpf_iter_bpf_array_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) + return; + + do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_array_map, + skel->maps.arraymap1); + + bpf_iter_bpf_array_map__destroy(skel); +} + static void test_bpf_percpu_array_map(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -1009,6 +1074,20 @@ out: bpf_iter_bpf_sk_storage_helpers__destroy(skel); } +static void test_bpf_sk_stoarge_map_iter_fd(void) +{ + struct bpf_iter_bpf_sk_storage_map *skel; + + skel = bpf_iter_bpf_sk_storage_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load")) + return; + + do_read_map_iter_fd(&skel->skeleton, skel->progs.rw_bpf_sk_storage_map, + skel->maps.sk_stg_map); + + bpf_iter_bpf_sk_storage_map__destroy(skel); +} + static void test_bpf_sk_storage_map(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -1044,7 +1123,15 @@ static void test_bpf_sk_storage_map(void) linfo.map.map_fd = map_fd; opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); + link = bpf_program__attach_iter(skel->progs.oob_write_bpf_sk_storage_map, &opts); + err = libbpf_get_error(link); + if (!ASSERT_EQ(err, -EACCES, "attach_oob_write_iter")) { + if (!err) + bpf_link__destroy(link); + goto out; + } + + link = bpf_program__attach_iter(skel->progs.rw_bpf_sk_storage_map, &opts); if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; @@ -1052,6 +1139,7 @@ static void test_bpf_sk_storage_map(void) if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; + skel->bss->to_add_val = time(NULL); /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; @@ -1065,6 +1153,13 @@ static void test_bpf_sk_storage_map(void) if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; + for (i = 0; i < num_sockets; i++) { + err = bpf_map_lookup_elem(map_fd, &sock_fd[i], &val); + if (!ASSERT_OK(err, "map_lookup") || + !ASSERT_EQ(val, i + 1 + skel->bss->to_add_val, "check_map_value")) + break; + } + close_iter: close(iter_fd); free_link: @@ -1217,6 +1312,19 @@ out: bpf_iter_task_vma__destroy(skel); } +void test_bpf_sockmap_map_iter_fd(void) +{ + struct bpf_iter_sockmap *skel; + + skel = bpf_iter_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load")) + return; + + do_read_map_iter_fd(&skel->skeleton, skel->progs.copy, skel->maps.sockmap); + + bpf_iter_sockmap__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -1267,10 +1375,14 @@ void test_bpf_iter(void) test_bpf_percpu_hash_map(); if (test__start_subtest("bpf_array_map")) test_bpf_array_map(); + if (test__start_subtest("bpf_array_map_iter_fd")) + test_bpf_array_map_iter_fd(); if (test__start_subtest("bpf_percpu_array_map")) test_bpf_percpu_array_map(); if (test__start_subtest("bpf_sk_storage_map")) test_bpf_sk_storage_map(); + if (test__start_subtest("bpf_sk_storage_map_iter_fd")) + test_bpf_sk_stoarge_map_iter_fd(); if (test__start_subtest("bpf_sk_storage_delete")) test_bpf_sk_storage_delete(); if (test__start_subtest("bpf_sk_storage_get")) @@ -1283,4 +1395,6 @@ void test_bpf_iter(void) test_link_iter(); if (test__start_subtest("ksym")) test_ksym_iter(); + if (test__start_subtest("bpf_sockmap_map_iter_fd")) + test_bpf_sockmap_map_iter_fd(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 0aa3cd34cbe3..d7a69217fb68 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -112,3 +112,12 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) return 0; } + +SEC("iter.s/bpf_map_elem") +int sleepable_dummy_dump(struct bpf_iter__bpf_map_elem *ctx) +{ + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(ctx->meta->seq, "map dump starts\n"); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c index 6b70ccaba301..c7b8e006b171 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c @@ -16,19 +16,37 @@ struct { __u32 val_sum = 0; __u32 ipv6_sk_count = 0; +__u32 to_add_val = 0; SEC("iter/bpf_sk_storage_map") -int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) +int rw_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) { struct sock *sk = ctx->sk; __u32 *val = ctx->value; - if (sk == (void *)0 || val == (void *)0) + if (sk == NULL || val == NULL) return 0; if (sk->sk_family == AF_INET6) ipv6_sk_count++; val_sum += *val; + + *val += to_add_val; + + return 0; +} + +SEC("iter/bpf_sk_storage_map") +int oob_write_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) +{ + struct sock *sk = ctx->sk; + __u32 *val = ctx->value; + + if (sk == NULL || val == NULL) + return 0; + + *(val + 1) = 0xdeadbeef; + return 0; }