Merge branch 'bpf: Expand bpf_cgrp_storage to support cgroup1 non-attach case'

Yafang Shao says:

====================
In the current cgroup1 environment, associating operations between a cgroup
and applications in a BPF program requires storing a mapping of cgroup_id
to application either in a hash map or maintaining it in userspace.
However, by enabling bpf_cgrp_storage for cgroup1, it becomes possible to
conveniently store application-specific information in cgroup-local storage
and utilize it within BPF programs. Furthermore, enabling this feature for
cgroup1 involves minor modifications for the non-attach case, streamlining
the process.

However, when it comes to enabling this functionality for the cgroup1
attach case, it presents challenges. Therefore, the decision is to focus on
enabling it solely for the cgroup1 non-attach case at present. If
attempting to attach to a cgroup1 fd, the operation will simply fail with
the error code -EBADF.

Changes:
- RFC -> v1:
  - Collect acked-by
  - Avoid unnecessary is_cgroup1 check (Yonghong)
  - Keep the code patterns consistent (Yonghong)
====================

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Martin KaFai Lau 2023-12-08 16:26:26 -08:00
commit 09115c33e6
7 changed files with 311 additions and 63 deletions

View file

@ -82,7 +82,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
int fd;
fd = *(int *)key;
cgroup = cgroup_get_from_fd(fd);
cgroup = cgroup_v1v2_get_from_fd(fd);
if (IS_ERR(cgroup))
return ERR_CAST(cgroup);
@ -101,7 +101,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
int fd;
fd = *(int *)key;
cgroup = cgroup_get_from_fd(fd);
cgroup = cgroup_v1v2_get_from_fd(fd);
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
@ -131,7 +131,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
int err, fd;
fd = *(int *)key;
cgroup = cgroup_get_from_fd(fd);
cgroup = cgroup_v1v2_get_from_fd(fd);
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);

View file

@ -689,3 +689,19 @@ int get_cgroup1_hierarchy_id(const char *subsys_name)
fclose(file);
return found ? id : -1;
}
/**
* open_classid() - Open a cgroupv1 net_cls classid
*
* This function expects the cgroup work dir to be already created, as we
* open it here.
*
* On success, it returns the file descriptor. On failure it returns -1.
*/
int open_classid(void)
{
char cgroup_workdir[PATH_MAX + 1];
format_classid_path(cgroup_workdir);
return open(cgroup_workdir, O_RDONLY);
}

View file

@ -33,6 +33,7 @@ void cleanup_cgroup_environment(void);
int set_classid(void);
int join_classid(void);
unsigned long long get_classid_cgroup_id(void);
int open_classid(void);
int setup_classid_environment(void);
void cleanup_classid_environment(void);

View file

@ -19,6 +19,21 @@ struct socket_cookie {
__u64 cookie_value;
};
static bool is_cgroup1;
static int target_hid;
#define CGROUP_MODE_SET(skel) \
{ \
skel->bss->is_cgroup1 = is_cgroup1; \
skel->bss->target_hid = target_hid; \
}
static void cgroup_mode_value_init(bool cgroup, int hid)
{
is_cgroup1 = cgroup;
target_hid = hid;
}
static void test_tp_btf(int cgroup_fd)
{
struct cgrp_ls_tp_btf *skel;
@ -29,6 +44,8 @@ static void test_tp_btf(int cgroup_fd)
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
CGROUP_MODE_SET(skel);
/* populate a value in map_b */
err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY);
if (!ASSERT_OK(err, "map_update_elem"))
@ -130,6 +147,8 @@ static void test_recursion(int cgroup_fd)
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
CGROUP_MODE_SET(skel);
err = cgrp_ls_recursion__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
@ -165,6 +184,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
CGROUP_MODE_SET(skel);
bpf_program__set_autoload(skel->progs.cgroup_iter, true);
err = cgrp_ls_sleepable__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@ -202,6 +223,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
CGROUP_MODE_SET(skel);
skel->bss->target_pid = syscall(SYS_gettid);
bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
@ -229,6 +251,8 @@ static void test_no_rcu_lock(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
CGROUP_MODE_SET(skel);
bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
ASSERT_ERR(err, "skel_load");
@ -236,7 +260,25 @@ static void test_no_rcu_lock(void)
cgrp_ls_sleepable__destroy(skel);
}
void test_cgrp_local_storage(void)
static void test_cgrp1_no_rcu_lock(void)
{
struct cgrp_ls_sleepable *skel;
int err;
skel = cgrp_ls_sleepable__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
CGROUP_MODE_SET(skel);
bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
ASSERT_OK(err, "skel_load");
cgrp_ls_sleepable__destroy(skel);
}
static void cgrp2_local_storage(void)
{
__u64 cgroup_id;
int cgroup_fd;
@ -245,6 +287,8 @@ void test_cgrp_local_storage(void)
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
return;
cgroup_mode_value_init(0, -1);
cgroup_id = get_cgroup_id("/cgrp_local_storage");
if (test__start_subtest("tp_btf"))
test_tp_btf(cgroup_fd);
@ -263,3 +307,55 @@ void test_cgrp_local_storage(void)
close(cgroup_fd);
}
static void cgrp1_local_storage(void)
{
int cgrp1_fd, cgrp1_hid, cgrp1_id, err;
/* Setup cgroup1 hierarchy */
err = setup_classid_environment();
if (!ASSERT_OK(err, "setup_classid_environment"))
return;
err = join_classid();
if (!ASSERT_OK(err, "join_cgroup1"))
goto cleanup;
cgrp1_fd = open_classid();
if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd"))
goto cleanup;
cgrp1_id = get_classid_cgroup_id();
if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id"))
goto close_fd;
cgrp1_hid = get_cgroup1_hierarchy_id("net_cls");
if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid"))
goto close_fd;
cgroup_mode_value_init(1, cgrp1_hid);
if (test__start_subtest("cgrp1_tp_btf"))
test_tp_btf(cgrp1_fd);
if (test__start_subtest("cgrp1_recursion"))
test_recursion(cgrp1_fd);
if (test__start_subtest("cgrp1_negative"))
test_negative();
if (test__start_subtest("cgrp1_iter_sleepable"))
test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id);
if (test__start_subtest("cgrp1_yes_rcu_lock"))
test_yes_rcu_lock(cgrp1_id);
if (test__start_subtest("cgrp1_no_rcu_lock"))
test_cgrp1_no_rcu_lock();
close_fd:
close(cgrp1_fd);
cleanup:
cleanup_classid_environment();
}
void test_cgrp_local_storage(void)
{
cgrp2_local_storage();
cgrp1_local_storage();
}

View file

@ -21,50 +21,100 @@ struct {
__type(value, long);
} map_b SEC(".maps");
int target_hid = 0;
bool is_cgroup1 = 0;
struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
static void __on_lookup(struct cgroup *cgrp)
{
bpf_cgrp_storage_delete(&map_a, cgrp);
bpf_cgrp_storage_delete(&map_b, cgrp);
}
SEC("fentry/bpf_local_storage_lookup")
int BPF_PROG(on_lookup)
{
struct task_struct *task = bpf_get_current_task_btf();
struct cgroup *cgrp;
bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp);
if (is_cgroup1) {
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp)
return 0;
__on_lookup(cgrp);
bpf_cgroup_release(cgrp);
return 0;
}
__on_lookup(task->cgroups->dfl_cgrp);
return 0;
}
static void __on_update(struct cgroup *cgrp)
{
long *ptr;
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
}
SEC("fentry/bpf_local_storage_update")
int BPF_PROG(on_update)
{
struct task_struct *task = bpf_get_current_task_btf();
struct cgroup *cgrp;
if (is_cgroup1) {
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp)
return 0;
__on_update(cgrp);
bpf_cgroup_release(cgrp);
return 0;
}
__on_update(task->cgroups->dfl_cgrp);
return 0;
}
static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
{
long *ptr;
ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
*ptr = 200;
ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
return 0;
*ptr = 100;
}
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
struct task_struct *task = bpf_get_current_task_btf();
struct cgroup *cgrp;
task = bpf_get_current_task_btf();
ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 200;
if (is_cgroup1) {
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp)
return 0;
ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 100;
__on_enter(regs, id, cgrp);
bpf_cgroup_release(cgrp);
return 0;
}
__on_enter(regs, id, task->cgroups->dfl_cgrp);
return 0;
}

View file

@ -17,7 +17,11 @@ struct {
__u32 target_pid;
__u64 cgroup_id;
int target_hid;
bool is_cgroup1;
struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
@ -37,23 +41,50 @@ int cgroup_iter(struct bpf_iter__cgroup *ctx)
return 0;
}
static void __no_rcu_lock(struct cgroup *cgrp)
{
long *ptr;
/* Note that trace rcu is held in sleepable prog, so we can use
* bpf_cgrp_storage_get() in sleepable prog.
*/
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
cgroup_id = cgrp->kn->id;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int cgrp1_no_rcu_lock(void *ctx)
{
struct task_struct *task;
struct cgroup *cgrp;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
/* bpf_task_get_cgroup1 can work in sleepable prog */
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp)
return 0;
__no_rcu_lock(cgrp);
bpf_cgroup_release(cgrp);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int no_rcu_lock(void *ctx)
{
struct task_struct *task;
struct cgroup *cgrp;
long *ptr;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
/* task->cgroups is untrusted in sleepable prog outside of RCU CS */
cgrp = task->cgroups->dfl_cgrp;
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
cgroup_id = cgrp->kn->id;
__no_rcu_lock(task->cgroups->dfl_cgrp);
return 0;
}
@ -68,6 +99,22 @@ int yes_rcu_lock(void *ctx)
if (task->pid != target_pid)
return 0;
if (is_cgroup1) {
bpf_rcu_read_lock();
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp) {
bpf_rcu_read_unlock();
return 0;
}
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
cgroup_id = cgrp->kn->id;
bpf_cgroup_release(cgrp);
bpf_rcu_read_unlock();
return 0;
}
bpf_rcu_read_lock();
cgrp = task->cgroups->dfl_cgrp;
/* cgrp is trusted under RCU CS */

View file

@ -27,62 +27,100 @@ pid_t target_pid = 0;
int mismatch_cnt = 0;
int enter_cnt = 0;
int exit_cnt = 0;
int target_hid = 0;
bool is_cgroup1 = 0;
struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
{
long *ptr;
int err;
/* populate value 0 */
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return;
/* delete value 0 */
err = bpf_cgrp_storage_delete(&map_a, cgrp);
if (err)
return;
/* value is not available */
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0);
if (ptr)
return;
/* re-populate the value */
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return;
__sync_fetch_and_add(&enter_cnt, 1);
*ptr = MAGIC_VALUE + enter_cnt;
}
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
int err;
struct cgroup *cgrp;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
/* populate value 0 */
ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
if (is_cgroup1) {
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp)
return 0;
/* delete value 0 */
err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
if (err)
__on_enter(regs, id, cgrp);
bpf_cgroup_release(cgrp);
return 0;
}
/* value is not available */
ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0);
if (ptr)
return 0;
/* re-populate the value */
ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
__sync_fetch_and_add(&enter_cnt, 1);
*ptr = MAGIC_VALUE + enter_cnt;
__on_enter(regs, id, task->cgroups->dfl_cgrp);
return 0;
}
static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp)
{
long *ptr;
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return;
__sync_fetch_and_add(&exit_cnt, 1);
if (*ptr != MAGIC_VALUE + exit_cnt)
__sync_fetch_and_add(&mismatch_cnt, 1);
}
SEC("tp_btf/sys_exit")
int BPF_PROG(on_exit, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
struct cgroup *cgrp;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
if (is_cgroup1) {
cgrp = bpf_task_get_cgroup1(task, target_hid);
if (!cgrp)
return 0;
__sync_fetch_and_add(&exit_cnt, 1);
if (*ptr != MAGIC_VALUE + exit_cnt)
__sync_fetch_and_add(&mismatch_cnt, 1);
__on_exit(regs, id, cgrp);
bpf_cgroup_release(cgrp);
return 0;
}
__on_exit(regs, id, task->cgroups->dfl_cgrp);
return 0;
}