Merge branch 'cgroup-auto-detach'

Roman Gushchin says: ==================== This patchset implements a cgroup bpf auto-detachment functionality: bpf programs are detached as soon as possible after removal of the cgroup, without waiting for the release of all associated resources. Patches 2 and 3 are required to implement a corresponding kselftest in patch 4. v5: 1) rebase v4: 1) release cgroup bpf data using a workqueue 2) add test_cgroup_attach to .gitignore v3: 1) some minor changes and typo fixes v2: 1) removed a bogus check in patch 4 2) moved buf[len] = 0 in patch 2 ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-10-06 16:49:22 +00:00 · 2019-05-28 09:30:03 -07:00 · 2019-05-28 09:30:03 -07:00 · d0a3a4b218
commit d0a3a4b218
parent 37b54aed12 d5506591d5
9 changed files with 263 additions and 31 deletions
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@ -6,6 +6,7 @@
 #include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
 #include <linux/percpu-refcount.h>
 #include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>
@ -72,10 +73,16 @@ struct cgroup_bpf {
 	/* temp storage for effective prog array used by prog_attach/detach */
 	struct bpf_prog_array __rcu *inactive;
 	/* reference counter used to detach bpf programs after cgroup removal */
 	struct percpu_ref refcnt;
 	/* cgroup_bpf is released using a work queue */
 	struct work_struct release_work;
 };
 void cgroup_bpf_put(struct cgroup *cgrp);
 int cgroup_bpf_inherit(struct cgroup *cgrp);
 void cgroup_bpf_offline(struct cgroup *cgrp);
 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 			enum bpf_attach_type type, u32 flags);
@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 struct bpf_prog;
 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
 					 enum bpf_prog_type ptype,
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
 #endif /* !CONFIG_CGROUPS */
 #ifdef CONFIG_CGROUP_BPF
 static inline void cgroup_bpf_get(struct cgroup *cgrp)
 {
 	percpu_ref_get(&cgrp->bpf.refcnt);
 }
 static inline void cgroup_bpf_put(struct cgroup *cgrp)
 {
 	percpu_ref_put(&cgrp->bpf.refcnt);
 }
 #else /* CONFIG_CGROUP_BPF */
 static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 #endif /* CONFIG_CGROUP_BPF */
 #endif /* _LINUX_CGROUP_H */
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@ -22,12 +22,21 @@
 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
-/**
+void cgroup_bpf_offline(struct cgroup *cgrp)
 * cgroup_bpf_put() - put references of all bpf programs
 * @cgrp: the cgroup to modify
 */
 void cgroup_bpf_put(struct cgroup *cgrp)
 {
 	cgroup_get(cgrp);
 	percpu_ref_kill(&cgrp->bpf.refcnt);
 }
 /**
 * cgroup_bpf_release() - put references of all bpf programs and
 *                        release all cgroup bpf data
 * @work: work structure embedded into the cgroup to modify
 */
 static void cgroup_bpf_release(struct work_struct *work)
 {
 	struct cgroup *cgrp = container_of(work, struct cgroup,
 					   bpf.release_work);
 	enum bpf_cgroup_storage_type stype;
 	unsigned int type;
@ -47,6 +56,22 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 		}
 		bpf_prog_array_free(cgrp->bpf.effective[type]);
 	}
 	percpu_ref_exit(&cgrp->bpf.refcnt);
 	cgroup_put(cgrp);
 }
 /**
 * cgroup_bpf_release_fn() - callback used to schedule releasing
 *                           of bpf cgroup data
 * @ref: percpu ref counter structure
 */
 static void cgroup_bpf_release_fn(struct percpu_ref *ref)
 {
 	struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
 	INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
 	queue_work(system_wq, &cgrp->bpf.release_work);
 }
 /* count number of elements in the list.
@ -167,7 +192,12 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 */
 #define	NR ARRAY_SIZE(cgrp->bpf.effective)
 	struct bpf_prog_array __rcu *arrays[NR] = {};
-	int i;
+	int ret, i;
 	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
 			      GFP_KERNEL);
 	if (ret)
 		return ret;
 	for (i = 0; i < NR; i++)
 		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@ -183,6 +213,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 cleanup:
 	for (i = 0; i < NR; i++)
 		bpf_prog_array_free(arrays[i]);
 	percpu_ref_exit(&cgrp->bpf.refcnt);
 	return -ENOMEM;
 }
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work)
 		if (cgrp->kn)
 			RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
 					 NULL);
 		cgroup_bpf_put(cgrp);
 	}
 	mutex_unlock(&cgroup_mutex);
@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	cgroup1_check_for_release(parent);
 	cgroup_bpf_offline(cgrp);
 	/* put the base reference */
 	percpu_ref_kill(&cgrp->self.refcnt);
@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 		 * Don't use cgroup_get_live().
 		 */
 		cgroup_get(sock_cgroup_ptr(skcd));
 		cgroup_bpf_get(sock_cgroup_ptr(skcd));
 		return;
 	}
@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 		cset = task_css_set(current);
 		if (likely(cgroup_tryget(cset->dfl_cgrp))) {
 			skcd->val = (unsigned long)cset->dfl_cgrp;
 			cgroup_bpf_get(cset->dfl_cgrp);
 			break;
 		}
 		cpu_relax();
@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 void cgroup_sk_free(struct sock_cgroup_data *skcd)
 {
-	cgroup_put(sock_cgroup_ptr(skcd));
+	struct cgroup *cgrp = sock_cgroup_ptr(skcd);
 	cgroup_bpf_put(cgrp);
 	cgroup_put(cgrp);
 }
 #endif	/* CONFIG_SOCK_CGROUP_DATA */
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += test_cgrp2_attach
 hostprogs-y += test_cgrp2_attach2
 hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o test_overhead_user.o
 test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
 test_cgrp2_attach-objs := test_cgrp2_attach.o
 test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
 test_cgrp2_sock-objs := test_cgrp2_sock.o
 test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
 xdp1-objs := xdp1_user.o
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@ -22,6 +22,7 @@ test_lirc_mode2_user
 get_cgroup_id_user
 test_skb_cgroup_id_user
 test_socket_cookie
 test_cgroup_attach
 test_cgroup_storage
 test_select_reuseport
 test_flow_dissector
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_btf_dump
+	test_btf_dump test_cgroup_attach
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@ -99,6 +99,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
 $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
 .PHONY: force
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@ -33,6 +33,60 @@
 	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
 		 CGROUP_WORK_DIR, path)
 /**
 * enable_all_controllers() - Enable all available cgroup v2 controllers
 *
 * Enable all available cgroup v2 controllers in order to increase
 * the code coverage.
 *
 * If successful, 0 is returned.
 */
 int enable_all_controllers(char *cgroup_path)
 {
 	char path[PATH_MAX + 1];
 	char buf[PATH_MAX];
 	char *c, *c2;
 	int fd, cfd;
 	size_t len;
 	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
 		log_err("Opening cgroup.controllers: %s", path);
 		return 1;
 	}
 	len = read(fd, buf, sizeof(buf) - 1);
 	if (len < 0) {
 		close(fd);
 		log_err("Reading cgroup.controllers: %s", path);
 		return 1;
 	}
 	buf[len] = 0;
 	close(fd);
 	/* No controllers available? We're probably on cgroup v1. */
 	if (len == 0)
 		return 0;
 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
 	cfd = open(path, O_RDWR);
 	if (cfd < 0) {
 		log_err("Opening cgroup.subtree_control: %s", path);
 		return 1;
 	}
 	for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
 		if (dprintf(cfd, "+%s\n", c) <= 0) {
 			log_err("Enabling controller %s: %s", c, path);
 			close(cfd);
 			return 1;
 		}
 	}
 	close(cfd);
 	return 0;
 }
 /**
 * setup_cgroup_environment() - Setup the cgroup environment
 *
@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
 		return 1;
 	}
 	if (enable_all_controllers(cgroup_workdir))
 		return 1;
 	return 0;
 }
--- a/tools/testing/selftests/bpf/test_cgroup_attach.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@ -1,3 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /* eBPF example program:
 *
 * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
@ -25,20 +27,27 @@
 #include <sys/resource.h>
 #include <sys/time.h>
 #include <unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
-#include "bpf_insn.h"
+#include "bpf_util.h"
 #include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 #define FOO		"/foo"
 #define BAR		"/foo/bar/"
-#define PING_CMD	"ping -c1 -w1 127.0.0.1 > /dev/null"
+#define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
 #ifdef DEBUG
 #define debug(args...) printf(args)
 #else
 #define debug(args...)
 #endif
 static int prog_load(int verdict)
 {
 	int ret;
@ -89,7 +98,7 @@ static int test_foo_bar(void)
 		goto err;
 	}
-	printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
+	debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
 	assert(system(PING_CMD) != 0);
 	/* Create cgroup /foo/bar, get fd, and join it */
@ -100,7 +109,7 @@ static int test_foo_bar(void)
 	if (join_cgroup(BAR))
 		goto err;
-	printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
+	debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
 	assert(system(PING_CMD) != 0);
 	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
@ -109,7 +118,7 @@ static int test_foo_bar(void)
 		goto err;
 	}
-	printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
+	debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
 	assert(system(PING_CMD) == 0);
 	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
@ -117,7 +126,7 @@ static int test_foo_bar(void)
 		goto err;
 	}
-	printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
+	debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
 	       "This ping in cgroup /foo/bar should fail...\n");
 	assert(system(PING_CMD) != 0);
@ -132,7 +141,7 @@ static int test_foo_bar(void)
 		goto err;
 	}
-	printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
+	debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
 	       "This ping in cgroup /foo/bar should pass...\n");
 	assert(system(PING_CMD) == 0);
@ -199,9 +208,9 @@ static int test_foo_bar(void)
 	close(bar);
 	cleanup_cgroup_environment();
 	if (!rc)
-		printf("### override:PASS\n");
+		printf("#override:PASS\n");
 	else
-		printf("### override:FAIL\n");
+		printf("#override:FAIL\n");
 	return rc;
 }
@ -441,19 +450,122 @@ static int test_multiprog(void)
 	close(cg5);
 	cleanup_cgroup_environment();
 	if (!rc)
-		printf("### multi:PASS\n");
+		printf("#multi:PASS\n");
 	else
-		printf("### multi:FAIL\n");
+		printf("#multi:FAIL\n");
 	return rc;
 }
-int main(int argc, char **argv)
+static int test_autodetach(void)
 {
-	int rc = 0;
+	__u32 prog_cnt = 4, attach_flags;
 	int allow_prog[2] = {0};
 	__u32 prog_ids[2] = {0};
 	int cg = 0, i, rc = -1;
 	void *ptr = NULL;
 	int attempts;
-	rc = test_foo_bar();
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
-	if (rc)
+		allow_prog[i] = prog_load_cnt(1, 1 << i);
-		return rc;
+		if (!allow_prog[i])
 			goto err;
 	}
-	return test_multiprog();
+	if (setup_cgroup_environment())
 		goto err;
 	/* create a cgroup, attach two programs and remember their ids */
 	cg = create_and_get_cgroup("/cg_autodetach");
 	if (cg < 0)
 		goto err;
 	if (join_cgroup("/cg_autodetach"))
 		goto err;
 	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
 		if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
 				    BPF_F_ALLOW_MULTI)) {
 			log_err("Attaching prog[%d] to cg:egress", i);
 			goto err;
 		}
 	}
 	/* make sure that programs are attached and run some traffic */
 	assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
 			      prog_ids, &prog_cnt) == 0);
 	assert(system(PING_CMD) == 0);
 	/* allocate some memory (4Mb) to pin the original cgroup */
 	ptr = malloc(4 * (1 << 20));
 	if (!ptr)
 		goto err;
 	/* close programs and cgroup fd */
 	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
 		close(allow_prog[i]);
 		allow_prog[i] = 0;
 	}
 	close(cg);
 	cg = 0;
 	/* leave the cgroup and remove it. don't detach programs */
 	cleanup_cgroup_environment();
 	/* wait for the asynchronous auto-detachment.
 	 * wait for no more than 5 sec and give up.
 	 */
 	for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
 		for (attempts = 5; attempts >= 0; attempts--) {
 			int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
 			if (fd < 0)
 				break;
 			/* don't leave the fd open */
 			close(fd);
 			if (!attempts)
 				goto err;
 			sleep(1);
 		}
 	}
 	rc = 0;
 err:
 	for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
 		if (allow_prog[i] > 0)
 			close(allow_prog[i]);
 	if (cg)
 		close(cg);
 	free(ptr);
 	cleanup_cgroup_environment();
 	if (!rc)
 		printf("#autodetach:PASS\n");
 	else
 		printf("#autodetach:FAIL\n");
 	return rc;
 }
 int main(void)
 {
 	int (*tests[])(void) = {
 		test_foo_bar,
 		test_multiprog,
 		test_autodetach,
 	};
 	int errors = 0;
 	int i;
 	for (i = 0; i < ARRAY_SIZE(tests); i++)
 		if (tests[i]())
 			errors++;
 	if (errors)
 		printf("test_cgroup_attach:FAIL\n");
 	else
 		printf("test_cgroup_attach:PASS\n");
 	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }