Merge branch 'bpf-fix-the-release-of-inner-map'

Hou Tao says:

====================
bpf: Fix the release of inner map

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to fix the release of inner map in map array or map
htab. The release of inner map is different with normal map. For normal
map, the map is released after the bpf program which uses the map is
destroyed, because the bpf program tracks the used maps. However bpf
program can not track the used inner map because these inner map may be
updated or deleted dynamically, and for now the ref-counter of inner map
is decreased after the inner map is remove from outer map, so the inner
map may be freed before the bpf program, which is accessing the inner
map, exits and there will be use-after-free problem as demonstrated by
patch #6.

The patchset fixes the problem by deferring the release of inner map.
The freeing of inner map is deferred according to the sleepable
attributes of the bpf programs which own the outer map. Patch #1 fixes
the warning when running the newly-added selftest under interpreter
mode. Patch #2 adds more parameters to .map_fd_put_ptr() to prepare for
the fix. Patch #3 fixes the incorrect value of need_defer when freeing
the fd array. Patch #4 fixes the potential use-after-free problem by
using call_rcu_tasks_trace() and call_rcu() to wait for one tasks trace
RCU GP and one RCU GP unconditionally. Patch #5 optimizes the free of
inner map by removing the unnecessary RCU GP waiting. Patch #6 adds a
selftest to demonstrate the potential use-after-free problem. Patch #7
updates a selftest to update outer map in syscall bpf program.

Please see individual patches for more details. And comments are always
welcome.

Change Log:
v5:
 * patch #3: rename fd_array_map_delete_elem_with_deferred_free() to
             __fd_array_map_delete_elem() (Alexei)
 * patch #5: use atomic64_t instead of atomic_t to prevent potential
             overflow (Alexei)
 * patch #7: use ptr_to_u64() helper instead of force casting to initialize
             pointers in bpf_attr (Alexei)

v4: https://lore.kernel.org/bpf/20231130140120.1736235-1-houtao@huaweicloud.com
  * patch #2: don't use "deferred", use "need_defer" uniformly
  * patch #3: newly-added, fix the incorrect value of need_defer during
              fd array free.
  * patch #4: doesn't consider the case in which bpf map is not used by
              any bpf program and only use sleepable_refcnt to remove
	      unnecessary tasks trace RCU GP (Alexei)
  * patch #4: remove memory barriers added due to cautiousness (Alexei)

v3: https://lore.kernel.org/bpf/20231124113033.503338-1-houtao@huaweicloud.com
  * multiple variable renamings (Martin)
  * define BPF_MAP_RCU_GP/BPF_MAP_RCU_TT_GP as bit (Martin)
  * use call_rcu() and its variants instead of synchronize_rcu() (Martin)
  * remove unnecessary mask in bpf_map_free_deferred() (Martin)
  * place atomic_or() and the related smp_mb() together (Martin)
  * add patch #6 to demonstrate that updating outer map in syscall
    program is dead-lock free (Alexei)
  * update comments about the memory barrier in bpf_map_fd_put_ptr()
  * update commit message for patch #3 and #4 to describe more details

v2: https://lore.kernel.org/bpf/20231113123324.3914612-1-houtao@huaweicloud.com
  * defer the invocation of ops->map_free() instead of bpf_map_put() (Martin)
  * update selftest to make it being reproducible under JIT mode (Martin)
  * remove unnecessary preparatory patches

v1: https://lore.kernel.org/bpf/20231107140702.1891778-1-houtao@huaweicloud.com
====================

Link: https://lore.kernel.org/r/20231204140425.1480317-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2023-12-04 17:50:27 -08:00
commit ce3c49da11
13 changed files with 453 additions and 41 deletions

View file

@ -106,7 +106,11 @@ struct bpf_map_ops {
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
void (*map_fd_put_ptr)(void *ptr);
/* If need_defer is true, the implementation should guarantee that
* the to-be-put element is still alive before the bpf program, which
* may manipulate it, exists.
*/
void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer);
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
@ -272,7 +276,11 @@ struct bpf_map {
*/
atomic64_t refcnt ____cacheline_aligned;
atomic64_t usercnt;
struct work_struct work;
/* rcu is used before freeing and work is only used during freeing */
union {
struct work_struct work;
struct rcu_head rcu;
};
struct mutex freeze_mutex;
atomic64_t writecnt;
/* 'Ownership' of program-containing map is claimed by the first program
@ -288,6 +296,9 @@ struct bpf_map {
} owner;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
bool free_after_mult_rcu_gp;
bool free_after_rcu_gp;
atomic64_t sleepable_refcnt;
s64 __percpu *elem_count;
};

View file

@ -867,11 +867,11 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
}
if (old_ptr)
map->ops->map_fd_put_ptr(old_ptr);
map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
@ -890,13 +890,18 @@ static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
if (old_ptr) {
map->ops->map_fd_put_ptr(old_ptr);
map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
return 0;
} else {
return -ENOENT;
}
}
static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
{
return __fd_array_map_delete_elem(map, key, true);
}
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
@ -913,8 +918,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
return prog;
}
static void prog_fd_array_put_ptr(void *ptr)
static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* bpf_prog is freed after one RCU or tasks trace grace period */
bpf_prog_put(ptr);
}
@ -924,13 +930,13 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
static void bpf_fd_array_map_clear(struct bpf_map *map)
static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
fd_array_map_delete_elem(map, &i);
__fd_array_map_delete_elem(map, &i, need_defer);
}
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
@ -1109,7 +1115,7 @@ static void prog_array_map_clear_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_array_aux,
work)->map;
bpf_fd_array_map_clear(map);
bpf_fd_array_map_clear(map, true);
bpf_map_put(map);
}
@ -1239,8 +1245,9 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
return ee;
}
static void perf_event_fd_array_put_ptr(void *ptr)
static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
@ -1258,7 +1265,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
fd_array_map_delete_elem(map, &i);
__fd_array_map_delete_elem(map, &i, true);
}
rcu_read_unlock();
}
@ -1266,7 +1273,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
static void perf_event_fd_array_map_free(struct bpf_map *map)
{
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
bpf_fd_array_map_clear(map);
bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@ -1294,7 +1301,7 @@ static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
return cgroup_get_from_fd(fd);
}
static void cgroup_fd_array_put_ptr(void *ptr)
static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
@ -1302,7 +1309,7 @@ static void cgroup_fd_array_put_ptr(void *ptr)
static void cgroup_fd_array_free(struct bpf_map *map)
{
bpf_fd_array_map_clear(map);
bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@ -1347,7 +1354,7 @@ static void array_of_map_free(struct bpf_map *map)
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
bpf_fd_array_map_clear(map);
bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}

View file

@ -2664,12 +2664,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len)
{
struct bpf_map *map;
bool sleepable;
u32 i;
sleepable = aux->sleepable;
for (i = 0; i < len; i++) {
map = used_maps[i];
if (map->ops->map_poke_untrack)
map->ops->map_poke_untrack(map, aux);
if (sleepable)
atomic64_dec(&map->sleepable_refcnt);
bpf_map_put(map);
}
}

View file

@ -897,7 +897,7 @@ static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
if (map->ops->map_fd_put_ptr) {
ptr = fd_htab_map_get_ptr(map, l);
map->ops->map_fd_put_ptr(ptr);
map->ops->map_fd_put_ptr(map, ptr, true);
}
}
@ -2484,7 +2484,7 @@ static void fd_htab_map_free(struct bpf_map *map)
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
void *ptr = fd_htab_map_get_ptr(map, l);
map->ops->map_fd_put_ptr(ptr);
map->ops->map_fd_put_ptr(map, ptr, false);
}
}
@ -2525,7 +2525,7 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
ret = htab_map_update_elem(map, key, &ptr, map_flags);
if (ret)
map->ops->map_fd_put_ptr(ptr);
map->ops->map_fd_put_ptr(map, ptr, false);
return ret;
}

View file

@ -32,12 +32,13 @@
*
* Different map implementations will rely on rcu in map methods
* lookup/update/delete, therefore eBPF programs must run under rcu lock
* if program is allowed to access maps, so check rcu_read_lock_held in
* all three functions.
* if program is allowed to access maps, so check rcu_read_lock_held() or
* rcu_read_lock_trace_held() in all three functions.
*/
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());
return (unsigned long) map->ops->map_lookup_elem(map, key);
}
@ -53,7 +54,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
void *, value, u64, flags)
{
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());
return map->ops->map_update_elem(map, key, value, flags);
}
@ -70,7 +72,8 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());
return map->ops->map_delete_elem(map, key);
}

View file

@ -127,12 +127,21 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map,
return inner_map;
}
void bpf_map_fd_put_ptr(void *ptr)
void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* ptr->ops->map_free() has to go through one
* rcu grace period by itself.
struct bpf_map *inner_map = ptr;
/* Defer the freeing of inner map according to the sleepable attribute
* of bpf program which owns the outer map, so unnecessary waiting for
* RCU tasks trace grace period can be avoided.
*/
bpf_map_put(ptr);
if (need_defer) {
if (atomic64_read(&map->sleepable_refcnt))
WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true);
else
WRITE_ONCE(inner_map->free_after_rcu_gp, true);
}
bpf_map_put(inner_map);
}
u32 bpf_map_fd_sys_lookup_elem(void *ptr)

View file

@ -13,7 +13,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd);
void bpf_map_meta_free(struct bpf_map *map_meta);
void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
int ufd);
void bpf_map_fd_put_ptr(void *ptr);
void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer);
u32 bpf_map_fd_sys_lookup_elem(void *ptr);
#endif

View file

@ -719,6 +719,28 @@ static void bpf_map_put_uref(struct bpf_map *map)
}
}
static void bpf_map_free_in_work(struct bpf_map *map)
{
INIT_WORK(&map->work, bpf_map_free_deferred);
/* Avoid spawning kworkers, since they all might contend
* for the same mutex like slab_mutex.
*/
queue_work(system_unbound_wq, &map->work);
}
static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
{
bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu));
}
static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
bpf_map_free_rcu_gp(rcu);
else
call_rcu(rcu, bpf_map_free_rcu_gp);
}
/* decrement map refcnt and schedule it for freeing via workqueue
* (underlying map implementation ops->map_free() might sleep)
*/
@ -728,11 +750,14 @@ void bpf_map_put(struct bpf_map *map)
/* bpf_map_free_id() must be called first */
bpf_map_free_id(map);
btf_put(map->btf);
INIT_WORK(&map->work, bpf_map_free_deferred);
/* Avoid spawning kworkers, since they all might contend
* for the same mutex like slab_mutex.
*/
queue_work(system_unbound_wq, &map->work);
WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
if (READ_ONCE(map->free_after_mult_rcu_gp))
call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp);
else if (READ_ONCE(map->free_after_rcu_gp))
call_rcu(&map->rcu, bpf_map_free_rcu_gp);
else
bpf_map_free_in_work(map);
}
}
EXPORT_SYMBOL_GPL(bpf_map_put);
@ -5323,6 +5348,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
goto out_unlock;
}
/* The bpf program will not access the bpf map, but for the sake of
* simplicity, increase sleepable_refcnt for sleepable program as well.
*/
if (prog->aux->sleepable)
atomic64_inc(&map->sleepable_refcnt);
memcpy(used_maps_new, used_maps_old,
sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
used_maps_new[prog->aux->used_map_cnt] = map;

View file

@ -17889,10 +17889,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return -E2BIG;
}
if (env->prog->aux->sleepable)
atomic64_inc(&map->sleepable_refcnt);
/* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it
* will be used by the valid program until it's unloaded
* and all maps are released in free_used_maps()
* and all maps are released in bpf_free_used_maps()
*/
bpf_map_inc(map);

View file

@ -0,0 +1,141 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <test_progs.h>
#include <bpf/btf.h>
#include "access_map_in_map.skel.h"
struct thread_ctx {
pthread_barrier_t barrier;
int outer_map_fd;
int start, abort;
int loop, err;
};
static int wait_for_start_or_abort(struct thread_ctx *ctx)
{
while (!ctx->start && !ctx->abort)
usleep(1);
return ctx->abort ? -1 : 0;
}
static void *update_map_fn(void *data)
{
struct thread_ctx *ctx = data;
int loop = ctx->loop, err = 0;
if (wait_for_start_or_abort(ctx) < 0)
return NULL;
pthread_barrier_wait(&ctx->barrier);
while (loop-- > 0) {
int fd, zero = 0;
fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
if (fd < 0) {
err |= 1;
pthread_barrier_wait(&ctx->barrier);
continue;
}
/* Remove the old inner map */
if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0)
err |= 2;
close(fd);
pthread_barrier_wait(&ctx->barrier);
}
ctx->err = err;
return NULL;
}
static void *access_map_fn(void *data)
{
struct thread_ctx *ctx = data;
int loop = ctx->loop;
if (wait_for_start_or_abort(ctx) < 0)
return NULL;
pthread_barrier_wait(&ctx->barrier);
while (loop-- > 0) {
/* Access the old inner map */
syscall(SYS_getpgid);
pthread_barrier_wait(&ctx->barrier);
}
return NULL;
}
static void test_map_in_map_access(const char *prog_name, const char *map_name)
{
struct access_map_in_map *skel;
struct bpf_map *outer_map;
struct bpf_program *prog;
struct thread_ctx ctx;
pthread_t tid[2];
int err;
skel = access_map_in_map__open();
if (!ASSERT_OK_PTR(skel, "access_map_in_map open"))
return;
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!ASSERT_OK_PTR(prog, "find program"))
goto out;
bpf_program__set_autoload(prog, true);
outer_map = bpf_object__find_map_by_name(skel->obj, map_name);
if (!ASSERT_OK_PTR(outer_map, "find map"))
goto out;
err = access_map_in_map__load(skel);
if (!ASSERT_OK(err, "access_map_in_map load"))
goto out;
err = access_map_in_map__attach(skel);
if (!ASSERT_OK(err, "access_map_in_map attach"))
goto out;
skel->bss->tgid = getpid();
memset(&ctx, 0, sizeof(ctx));
pthread_barrier_init(&ctx.barrier, NULL, 2);
ctx.outer_map_fd = bpf_map__fd(outer_map);
ctx.loop = 4;
err = pthread_create(&tid[0], NULL, update_map_fn, &ctx);
if (!ASSERT_OK(err, "close_thread"))
goto out;
err = pthread_create(&tid[1], NULL, access_map_fn, &ctx);
if (!ASSERT_OK(err, "read_thread")) {
ctx.abort = 1;
pthread_join(tid[0], NULL);
goto out;
}
ctx.start = 1;
pthread_join(tid[0], NULL);
pthread_join(tid[1], NULL);
ASSERT_OK(ctx.err, "err");
out:
access_map_in_map__destroy(skel);
}
void test_map_in_map(void)
{
if (test__start_subtest("acc_map_in_array"))
test_map_in_map_access("access_map_in_array", "outer_array_map");
if (test__start_subtest("sleepable_acc_map_in_array"))
test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map");
if (test__start_subtest("acc_map_in_htab"))
test_map_in_map_access("access_map_in_htab", "outer_htab_map");
if (test__start_subtest("sleepable_acc_map_in_htab"))
test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map");
}

View file

@ -12,7 +12,7 @@ struct args {
int btf_fd;
};
void test_syscall(void)
static void test_syscall_load_prog(void)
{
static char verifier_log[8192];
struct args ctx = {
@ -32,7 +32,7 @@ void test_syscall(void)
if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
prog_fd = bpf_program__fd(skel->progs.bpf_prog);
prog_fd = bpf_program__fd(skel->progs.load_prog);
err = bpf_prog_test_run_opts(prog_fd, &tattr);
ASSERT_EQ(err, 0, "err");
ASSERT_EQ(tattr.retval, 1, "retval");
@ -53,3 +53,29 @@ void test_syscall(void)
if (ctx.btf_fd > 0)
close(ctx.btf_fd);
}
static void test_syscall_update_outer_map(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
struct syscall *skel;
int err, prog_fd;
skel = syscall__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
prog_fd = bpf_program__fd(skel->progs.update_outer_map);
err = bpf_prog_test_run_opts(prog_fd, &opts);
ASSERT_EQ(err, 0, "err");
ASSERT_EQ(opts.retval, 1, "retval");
cleanup:
syscall__destroy(skel);
}
void test_syscall(void)
{
if (test__start_subtest("load_prog"))
test_syscall_load_prog();
if (test__start_subtest("update_outer_map"))
test_syscall_update_outer_map();
}

View file

@ -0,0 +1,93 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
#include <linux/bpf.h>
#include <time.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
struct inner_map_type {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(key_size, 4);
__uint(value_size, 4);
__uint(max_entries, 1);
} inner_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__type(key, int);
__type(value, int);
__uint(max_entries, 1);
__array(values, struct inner_map_type);
} outer_array_map SEC(".maps") = {
.values = {
[0] = &inner_map,
},
};
struct {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__type(key, int);
__type(value, int);
__uint(max_entries, 1);
__array(values, struct inner_map_type);
} outer_htab_map SEC(".maps") = {
.values = {
[0] = &inner_map,
},
};
char _license[] SEC("license") = "GPL";
int tgid = 0;
static int acc_map_in_map(void *outer_map)
{
int i, key, value = 0xdeadbeef;
void *inner_map;
if ((bpf_get_current_pid_tgid() >> 32) != tgid)
return 0;
/* Find nonexistent inner map */
key = 1;
inner_map = bpf_map_lookup_elem(outer_map, &key);
if (inner_map)
return 0;
/* Find the old inner map */
key = 0;
inner_map = bpf_map_lookup_elem(outer_map, &key);
if (!inner_map)
return 0;
/* Wait for the old inner map to be replaced */
for (i = 0; i < 2048; i++)
bpf_map_update_elem(inner_map, &key, &value, 0);
return 0;
}
SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
int access_map_in_array(void *ctx)
{
return acc_map_in_map(&outer_array_map);
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int sleepable_access_map_in_array(void *ctx)
{
return acc_map_in_map(&outer_array_map);
}
SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
int access_map_in_htab(void *ctx)
{
return acc_map_in_map(&outer_htab_map);
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int sleepable_access_map_in_htab(void *ctx)
{
return acc_map_in_map(&outer_htab_map);
}

View file

@ -6,9 +6,15 @@
#include <bpf/bpf_tracing.h>
#include <../../../tools/include/linux/filter.h>
#include <linux/btf.h>
#include <string.h>
#include <errno.h>
char _license[] SEC("license") = "GPL";
struct bpf_map {
int id;
} __attribute__((preserve_access_index));
struct args {
__u64 log_buf;
__u32 log_size;
@ -27,6 +33,37 @@ struct args {
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
BTF_INT_ENC(encoding, bits_offset, bits)
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, union bpf_attr);
__uint(max_entries, 1);
} bpf_attr_array SEC(".maps");
struct inner_map_type {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(key_size, 4);
__uint(value_size, 4);
__uint(max_entries, 1);
} inner_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__type(key, int);
__type(value, int);
__uint(max_entries, 1);
__array(values, struct inner_map_type);
} outer_array_map SEC(".maps") = {
.values = {
[0] = &inner_map,
},
};
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
static int btf_load(void)
{
struct btf_blob {
@ -58,7 +95,7 @@ static int btf_load(void)
}
SEC("syscall")
int bpf_prog(struct args *ctx)
int load_prog(struct args *ctx)
{
static char license[] = "GPL";
static struct bpf_insn insns[] = {
@ -94,8 +131,8 @@ int bpf_prog(struct args *ctx)
map_create_attr.max_entries = ctx->max_entries;
map_create_attr.btf_fd = ret;
prog_load_attr.license = (long) license;
prog_load_attr.insns = (long) insns;
prog_load_attr.license = ptr_to_u64(license);
prog_load_attr.insns = ptr_to_u64(insns);
prog_load_attr.log_buf = ctx->log_buf;
prog_load_attr.log_size = ctx->log_size;
prog_load_attr.log_level = 1;
@ -107,8 +144,8 @@ int bpf_prog(struct args *ctx)
insns[3].imm = ret;
map_update_attr.map_fd = ret;
map_update_attr.key = (long) &key;
map_update_attr.value = (long) &value;
map_update_attr.key = ptr_to_u64(&key);
map_update_attr.value = ptr_to_u64(&value);
ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
if (ret < 0)
return ret;
@ -119,3 +156,52 @@ int bpf_prog(struct args *ctx)
ctx->prog_fd = ret;
return 1;
}
SEC("syscall")
int update_outer_map(void *ctx)
{
int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err;
const int attr_sz = sizeof(union bpf_attr);
union bpf_attr *attr;
attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero);
if (!attr)
goto out;
memset(attr, 0, attr_sz);
attr->map_id = ((struct bpf_map *)&outer_array_map)->id;
outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz);
if (outer_fd < 0)
goto out;
memset(attr, 0, attr_sz);
attr->map_type = BPF_MAP_TYPE_ARRAY;
attr->key_size = 4;
attr->value_size = 4;
attr->max_entries = 1;
inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz);
if (inner_fd < 0)
goto out;
memset(attr, 0, attr_sz);
attr->map_fd = outer_fd;
attr->key = ptr_to_u64(&zero);
attr->value = ptr_to_u64(&inner_fd);
err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz);
if (err)
goto out;
memset(attr, 0, attr_sz);
attr->map_fd = outer_fd;
attr->key = ptr_to_u64(&zero);
err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz);
if (err)
goto out;
ret = 1;
out:
if (inner_fd >= 0)
bpf_sys_close(inner_fd);
if (outer_fd >= 0)
bpf_sys_close(outer_fd);
return ret;
}