Merge branch 'cgroup-auto-detach'

Roman Gushchin says:

====================
This patchset implements a cgroup bpf auto-detachment functionality:
bpf programs are detached as soon as possible after removal of the
cgroup, without waiting for the release of all associated resources.

Patches 2 and 3 are required to implement a corresponding kselftest
in patch 4.

v5:
  1) rebase

v4:
  1) release cgroup bpf data using a workqueue
  2) add test_cgroup_attach to .gitignore

v3:
  1) some minor changes and typo fixes

v2:
  1) removed a bogus check in patch 4
  2) moved buf[len] = 0 in patch 2
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2019-05-28 09:30:03 -07:00
commit d0a3a4b218
9 changed files with 263 additions and 31 deletions

View file

@ -6,6 +6,7 @@
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/percpu-refcount.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <uapi/linux/bpf.h> #include <uapi/linux/bpf.h>
@ -72,10 +73,16 @@ struct cgroup_bpf {
/* temp storage for effective prog array used by prog_attach/detach */ /* temp storage for effective prog array used by prog_attach/detach */
struct bpf_prog_array __rcu *inactive; struct bpf_prog_array __rcu *inactive;
/* reference counter used to detach bpf programs after cgroup removal */
struct percpu_ref refcnt;
/* cgroup_bpf is released using a work queue */
struct work_struct release_work;
}; };
void cgroup_bpf_put(struct cgroup *cgrp);
int cgroup_bpf_inherit(struct cgroup *cgrp); int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags); enum bpf_attach_type type, u32 flags);
@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
struct bpf_prog; struct bpf_prog;
struct cgroup_bpf {}; struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, enum bpf_prog_type ptype,

View file

@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
#endif /* !CONFIG_CGROUPS */ #endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUP_BPF
static inline void cgroup_bpf_get(struct cgroup *cgrp)
{
percpu_ref_get(&cgrp->bpf.refcnt);
}
static inline void cgroup_bpf_put(struct cgroup *cgrp)
{
percpu_ref_put(&cgrp->bpf.refcnt);
}
#else /* CONFIG_CGROUP_BPF */
static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
#endif /* CONFIG_CGROUP_BPF */
#endif /* _LINUX_CGROUP_H */ #endif /* _LINUX_CGROUP_H */

View file

@ -22,12 +22,21 @@
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key); EXPORT_SYMBOL(cgroup_bpf_enabled_key);
/** void cgroup_bpf_offline(struct cgroup *cgrp)
* cgroup_bpf_put() - put references of all bpf programs
* @cgrp: the cgroup to modify
*/
void cgroup_bpf_put(struct cgroup *cgrp)
{ {
cgroup_get(cgrp);
percpu_ref_kill(&cgrp->bpf.refcnt);
}
/**
* cgroup_bpf_release() - put references of all bpf programs and
* release all cgroup bpf data
* @work: work structure embedded into the cgroup to modify
*/
static void cgroup_bpf_release(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup,
bpf.release_work);
enum bpf_cgroup_storage_type stype; enum bpf_cgroup_storage_type stype;
unsigned int type; unsigned int type;
@ -47,6 +56,22 @@ void cgroup_bpf_put(struct cgroup *cgrp)
} }
bpf_prog_array_free(cgrp->bpf.effective[type]); bpf_prog_array_free(cgrp->bpf.effective[type]);
} }
percpu_ref_exit(&cgrp->bpf.refcnt);
cgroup_put(cgrp);
}
/**
* cgroup_bpf_release_fn() - callback used to schedule releasing
* of bpf cgroup data
* @ref: percpu ref counter structure
*/
static void cgroup_bpf_release_fn(struct percpu_ref *ref)
{
struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
queue_work(system_wq, &cgrp->bpf.release_work);
} }
/* count number of elements in the list. /* count number of elements in the list.
@ -167,7 +192,12 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
*/ */
#define NR ARRAY_SIZE(cgrp->bpf.effective) #define NR ARRAY_SIZE(cgrp->bpf.effective)
struct bpf_prog_array __rcu *arrays[NR] = {}; struct bpf_prog_array __rcu *arrays[NR] = {};
int i; int ret, i;
ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
GFP_KERNEL);
if (ret)
return ret;
for (i = 0; i < NR; i++) for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]); INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@ -183,6 +213,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
cleanup: cleanup:
for (i = 0; i < NR; i++) for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]); bpf_prog_array_free(arrays[i]);
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM; return -ENOMEM;
} }

View file

@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work)
if (cgrp->kn) if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL); NULL);
cgroup_bpf_put(cgrp);
} }
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
cgroup1_check_for_release(parent); cgroup1_check_for_release(parent);
cgroup_bpf_offline(cgrp);
/* put the base reference */ /* put the base reference */
percpu_ref_kill(&cgrp->self.refcnt); percpu_ref_kill(&cgrp->self.refcnt);
@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
* Don't use cgroup_get_live(). * Don't use cgroup_get_live().
*/ */
cgroup_get(sock_cgroup_ptr(skcd)); cgroup_get(sock_cgroup_ptr(skcd));
cgroup_bpf_get(sock_cgroup_ptr(skcd));
return; return;
} }
@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
cset = task_css_set(current); cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) { if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp; skcd->val = (unsigned long)cset->dfl_cgrp;
cgroup_bpf_get(cset->dfl_cgrp);
break; break;
} }
cpu_relax(); cpu_relax();
@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
void cgroup_sk_free(struct sock_cgroup_data *skcd) void cgroup_sk_free(struct sock_cgroup_data *skcd)
{ {
cgroup_put(sock_cgroup_ptr(skcd)); struct cgroup *cgrp = sock_cgroup_ptr(skcd);
cgroup_bpf_put(cgrp);
cgroup_put(cgrp);
} }
#endif /* CONFIG_SOCK_CGROUP_DATA */ #endif /* CONFIG_SOCK_CGROUP_DATA */

View file

@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
hostprogs-y += test_overhead hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin hostprogs-y += test_cgrp2_array_pin
hostprogs-y += test_cgrp2_attach hostprogs-y += test_cgrp2_attach
hostprogs-y += test_cgrp2_attach2
hostprogs-y += test_cgrp2_sock hostprogs-y += test_cgrp2_sock
hostprogs-y += test_cgrp2_sock2 hostprogs-y += test_cgrp2_sock2
hostprogs-y += xdp1 hostprogs-y += xdp1
@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o test_overhead-objs := bpf_load.o test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o test_cgrp2_attach-objs := test_cgrp2_attach.o
test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := test_cgrp2_sock.o test_cgrp2_sock-objs := test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o xdp1-objs := xdp1_user.o

View file

@ -22,6 +22,7 @@ test_lirc_mode2_user
get_cgroup_id_user get_cgroup_id_user
test_skb_cgroup_id_user test_skb_cgroup_id_user
test_socket_cookie test_socket_cookie
test_cgroup_attach
test_cgroup_storage test_cgroup_storage
test_select_reuseport test_select_reuseport
test_flow_dissector test_flow_dissector

View file

@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_btf_dump test_btf_dump test_cgroup_attach
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES) TEST_GEN_FILES = $(BPF_OBJ_FILES)
@ -99,6 +99,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c
$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
.PHONY: force .PHONY: force

View file

@ -33,6 +33,60 @@
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
CGROUP_WORK_DIR, path) CGROUP_WORK_DIR, path)
/**
* enable_all_controllers() - Enable all available cgroup v2 controllers
*
* Enable all available cgroup v2 controllers in order to increase
* the code coverage.
*
* If successful, 0 is returned.
*/
int enable_all_controllers(char *cgroup_path)
{
char path[PATH_MAX + 1];
char buf[PATH_MAX];
char *c, *c2;
int fd, cfd;
size_t len;
snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
fd = open(path, O_RDONLY);
if (fd < 0) {
log_err("Opening cgroup.controllers: %s", path);
return 1;
}
len = read(fd, buf, sizeof(buf) - 1);
if (len < 0) {
close(fd);
log_err("Reading cgroup.controllers: %s", path);
return 1;
}
buf[len] = 0;
close(fd);
/* No controllers available? We're probably on cgroup v1. */
if (len == 0)
return 0;
snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
cfd = open(path, O_RDWR);
if (cfd < 0) {
log_err("Opening cgroup.subtree_control: %s", path);
return 1;
}
for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
if (dprintf(cfd, "+%s\n", c) <= 0) {
log_err("Enabling controller %s: %s", c, path);
close(cfd);
return 1;
}
}
close(cfd);
return 0;
}
/** /**
* setup_cgroup_environment() - Setup the cgroup environment * setup_cgroup_environment() - Setup the cgroup environment
* *
@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
return 1; return 1;
} }
if (enable_all_controllers(cgroup_workdir))
return 1;
return 0; return 0;
} }

View file

@ -1,3 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/* eBPF example program: /* eBPF example program:
* *
* - Creates arraymap in kernel with 4 bytes keys and 8 byte values * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
@ -25,20 +27,27 @@
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include <linux/filter.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include "bpf_insn.h" #include "bpf_util.h"
#include "bpf_rlimit.h" #include "bpf_rlimit.h"
#include "cgroup_helpers.h" #include "cgroup_helpers.h"
#define FOO "/foo" #define FOO "/foo"
#define BAR "/foo/bar/" #define BAR "/foo/bar/"
#define PING_CMD "ping -c1 -w1 127.0.0.1 > /dev/null" #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE]; char bpf_log_buf[BPF_LOG_BUF_SIZE];
#ifdef DEBUG
#define debug(args...) printf(args)
#else
#define debug(args...)
#endif
static int prog_load(int verdict) static int prog_load(int verdict)
{ {
int ret; int ret;
@ -89,7 +98,7 @@ static int test_foo_bar(void)
goto err; goto err;
} }
printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
assert(system(PING_CMD) != 0); assert(system(PING_CMD) != 0);
/* Create cgroup /foo/bar, get fd, and join it */ /* Create cgroup /foo/bar, get fd, and join it */
@ -100,7 +109,7 @@ static int test_foo_bar(void)
if (join_cgroup(BAR)) if (join_cgroup(BAR))
goto err; goto err;
printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0); assert(system(PING_CMD) != 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
@ -109,7 +118,7 @@ static int test_foo_bar(void)
goto err; goto err;
} }
printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0); assert(system(PING_CMD) == 0);
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
@ -117,7 +126,7 @@ static int test_foo_bar(void)
goto err; goto err;
} }
printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
"This ping in cgroup /foo/bar should fail...\n"); "This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0); assert(system(PING_CMD) != 0);
@ -132,7 +141,7 @@ static int test_foo_bar(void)
goto err; goto err;
} }
printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
"This ping in cgroup /foo/bar should pass...\n"); "This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0); assert(system(PING_CMD) == 0);
@ -199,9 +208,9 @@ static int test_foo_bar(void)
close(bar); close(bar);
cleanup_cgroup_environment(); cleanup_cgroup_environment();
if (!rc) if (!rc)
printf("### override:PASS\n"); printf("#override:PASS\n");
else else
printf("### override:FAIL\n"); printf("#override:FAIL\n");
return rc; return rc;
} }
@ -441,19 +450,122 @@ static int test_multiprog(void)
close(cg5); close(cg5);
cleanup_cgroup_environment(); cleanup_cgroup_environment();
if (!rc) if (!rc)
printf("### multi:PASS\n"); printf("#multi:PASS\n");
else else
printf("### multi:FAIL\n"); printf("#multi:FAIL\n");
return rc; return rc;
} }
int main(int argc, char **argv) static int test_autodetach(void)
{ {
int rc = 0; __u32 prog_cnt = 4, attach_flags;
int allow_prog[2] = {0};
__u32 prog_ids[2] = {0};
int cg = 0, i, rc = -1;
void *ptr = NULL;
int attempts;
rc = test_foo_bar(); for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
if (rc) allow_prog[i] = prog_load_cnt(1, 1 << i);
return rc; if (!allow_prog[i])
goto err;
}
return test_multiprog(); if (setup_cgroup_environment())
goto err;
/* create a cgroup, attach two programs and remember their ids */
cg = create_and_get_cgroup("/cg_autodetach");
if (cg < 0)
goto err;
if (join_cgroup("/cg_autodetach"))
goto err;
for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog[%d] to cg:egress", i);
goto err;
}
}
/* make sure that programs are attached and run some traffic */
assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
prog_ids, &prog_cnt) == 0);
assert(system(PING_CMD) == 0);
/* allocate some memory (4Mb) to pin the original cgroup */
ptr = malloc(4 * (1 << 20));
if (!ptr)
goto err;
/* close programs and cgroup fd */
for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
close(allow_prog[i]);
allow_prog[i] = 0;
}
close(cg);
cg = 0;
/* leave the cgroup and remove it. don't detach programs */
cleanup_cgroup_environment();
/* wait for the asynchronous auto-detachment.
* wait for no more than 5 sec and give up.
*/
for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
for (attempts = 5; attempts >= 0; attempts--) {
int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
if (fd < 0)
break;
/* don't leave the fd open */
close(fd);
if (!attempts)
goto err;
sleep(1);
}
}
rc = 0;
err:
for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
if (allow_prog[i] > 0)
close(allow_prog[i]);
if (cg)
close(cg);
free(ptr);
cleanup_cgroup_environment();
if (!rc)
printf("#autodetach:PASS\n");
else
printf("#autodetach:FAIL\n");
return rc;
}
int main(void)
{
int (*tests[])(void) = {
test_foo_bar,
test_multiprog,
test_autodetach,
};
int errors = 0;
int i;
for (i = 0; i < ARRAY_SIZE(tests); i++)
if (tests[i]())
errors++;
if (errors)
printf("test_cgroup_attach:FAIL\n");
else
printf("test_cgroup_attach:PASS\n");
return errors ? EXIT_FAILURE : EXIT_SUCCESS;
} }