mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 22:02:02 +00:00
Merge branch 'allow-bpf_refcount_acquire-of-mapval-obtained-via-direct-ld'
Dave Marchevsky says:
====================
Allow bpf_refcount_acquire of mapval obtained via direct LD
Consider this BPF program:
struct cgv_node {
int d;
struct bpf_refcount r;
struct bpf_rb_node rb;
};
struct val_stash {
struct cgv_node __kptr *v;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct val_stash);
__uint(max_entries, 10);
} array_map SEC(".maps");
long bpf_program(void *ctx)
{
struct val_stash *mapval;
struct cgv_node *p;
int idx = 0;
mapval = bpf_map_lookup_elem(&array_map, &idx);
if (!mapval || !mapval->v) { /* omitted */ }
p = bpf_refcount_acquire(mapval->v); /* Verification FAILs here */
/* Add p to some tree */
return 0;
}
Verification fails on the refcount_acquire:
160: (79) r1 = *(u64 *)(r8 +8) ; R1_w=untrusted_ptr_or_null_cgv_node(id=11,off=0,imm=0) R8_w=map_value(id=10,off=0,ks=8,vs=16,imm=0) refs=6
161: (b7) r2 = 0 ; R2_w=0 refs=6
162: (85) call bpf_refcount_acquire_impl#117824
arg#0 is neither owning or non-owning ref
The above verifier dump is actually from sched_ext's scx_flatcg [0],
which is the motivating usecase for this series' changes. Specifically,
scx_flatcg stashes a rb_node type w/ cgroup-specific info (struct
cgv_node) in a map when the cgroup is created, then later puts that
cgroup's node in a rbtree in .enqueue . Making struct cgv_node
refcounted would simplify the code a bit by virtue of allowing us to
remove the kptr_xchg's, but "later puts that cgroups node in a rbtree"
is not possible without a refcount_acquire, which suffers from the above
verification failure.
If we get rid of PTR_UNTRUSTED flag, and add MEM_ALLOC | NON_OWN_REF,
mapval->v would be a non-owning ref and verification would succeed. Due
to the most recent set of refcount changes [1], which modified
bpf_obj_drop behavior to not reuse refcounted graph node's underlying
memory until after RCU grace period, this is safe to do. Once mapval->v
has the correct flags it _is_ a non-owning reference and verification of
the motivating example will succeed.
[0]: 52911e1040/tools/sched_ext/scx_flatcg.bpf.c (L275)
[1]: https://lore.kernel.org/bpf/20230821193311.3290257-1-davemarchevsky@fb.com/
Summary of patches:
* Patch 1 fixes an issue with bpf_refcount_acquire verification
letting MAYBE_NULL ptrs through
* Patch 2 tests Patch 1's fix
* Patch 3 broadens the use of "free only after RCU GP" to all
user-allocated types
* Patch 4 is a small nonfunctional refactoring
* Patch 5 changes verifier to mark direct LD of stashed graph node
kptr as non-owning ref
* Patch 6 tests Patch 5's verifier changes
Changelog:
v1 -> v2: https://lore.kernel.org/bpf/20231025214007.2920506-1-davemarchevsky@fb.com/
Series title changed to "Allow bpf_refcount_acquire of mapval obtained via
direct LD". V1's title was mistakenly truncated.
* Patch 5 ("bpf: Mark direct ld of stashed bpf_{rb,list}_node as non-owning ref")
* Direct LD of percpu kptr should not have MEM_ALLOC flag (Yonghong)
* Patch 6 ("selftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld")
* Test read from stashed value as well
====================
Link: https://lore.kernel.org/r/20231107085639.3016113-1-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
3f6d04d742
7 changed files with 162 additions and 19 deletions
|
@ -186,8 +186,8 @@ enum btf_field_type {
|
|||
BPF_LIST_NODE = (1 << 6),
|
||||
BPF_RB_ROOT = (1 << 7),
|
||||
BPF_RB_NODE = (1 << 8),
|
||||
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
|
||||
BPF_RB_NODE | BPF_RB_ROOT,
|
||||
BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
|
||||
BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
|
||||
BPF_REFCOUNT = (1 << 9),
|
||||
};
|
||||
|
||||
|
|
|
@ -3840,9 +3840,6 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
|
|||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
|
||||
#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
|
||||
|
||||
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
|
||||
{
|
||||
int i;
|
||||
|
@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
|
|||
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
|
||||
* does not form cycles.
|
||||
*/
|
||||
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
|
||||
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
|
||||
return 0;
|
||||
for (i = 0; i < rec->cnt; i++) {
|
||||
struct btf_struct_meta *meta;
|
||||
u32 btf_id;
|
||||
|
||||
if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
|
||||
if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
|
||||
continue;
|
||||
btf_id = rec->fields[i].graph_root.value_btf_id;
|
||||
meta = btf_find_struct_meta(btf, btf_id);
|
||||
|
@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
|
|||
* to check ownership cycle for a type unless it's also a
|
||||
* node type.
|
||||
*/
|
||||
if (!(rec->field_mask & GRAPH_NODE_MASK))
|
||||
if (!(rec->field_mask & BPF_GRAPH_NODE))
|
||||
continue;
|
||||
|
||||
/* We need to ensure ownership acyclicity among all types. The
|
||||
|
@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
|
|||
* - A is both an root and node.
|
||||
* - B is only an node.
|
||||
*/
|
||||
if (meta->record->field_mask & GRAPH_ROOT_MASK)
|
||||
if (meta->record->field_mask & BPF_GRAPH_ROOT)
|
||||
return -ELOOP;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
|
|||
ma = &bpf_global_percpu_ma;
|
||||
else
|
||||
ma = &bpf_global_ma;
|
||||
if (rec && rec->refcount_off >= 0)
|
||||
bpf_mem_free_rcu(ma, p);
|
||||
else
|
||||
bpf_mem_free(ma, p);
|
||||
}
|
||||
|
||||
__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
|
||||
|
@ -2520,7 +2517,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
|
|||
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
|
||||
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
|
||||
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
|
||||
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
|
||||
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
|
||||
|
|
|
@ -5557,10 +5557,23 @@ BTF_SET_END(rcu_protected_types)
|
|||
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
|
||||
{
|
||||
if (!btf_is_kernel(btf))
|
||||
return false;
|
||||
return true;
|
||||
return btf_id_set_contains(&rcu_protected_types, btf_id);
|
||||
}
|
||||
|
||||
static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
|
||||
{
|
||||
struct btf_struct_meta *meta;
|
||||
|
||||
if (btf_is_kernel(kptr_field->kptr.btf))
|
||||
return NULL;
|
||||
|
||||
meta = btf_find_struct_meta(kptr_field->kptr.btf,
|
||||
kptr_field->kptr.btf_id);
|
||||
|
||||
return meta ? meta->record : NULL;
|
||||
}
|
||||
|
||||
static bool rcu_safe_kptr(const struct btf_field *field)
|
||||
{
|
||||
const struct btf_field_kptr *kptr = &field->kptr;
|
||||
|
@ -5571,12 +5584,25 @@ static bool rcu_safe_kptr(const struct btf_field *field)
|
|||
|
||||
static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
|
||||
{
|
||||
struct btf_record *rec;
|
||||
u32 ret;
|
||||
|
||||
ret = PTR_MAYBE_NULL;
|
||||
if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
|
||||
if (kptr_field->type != BPF_KPTR_PERCPU)
|
||||
return PTR_MAYBE_NULL | MEM_RCU;
|
||||
return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
|
||||
ret |= MEM_RCU;
|
||||
if (kptr_field->type == BPF_KPTR_PERCPU)
|
||||
ret |= MEM_PERCPU;
|
||||
else if (!btf_is_kernel(kptr_field->kptr.btf))
|
||||
ret |= MEM_ALLOC;
|
||||
|
||||
rec = kptr_pointee_btf_record(kptr_field);
|
||||
if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
|
||||
ret |= NON_OWN_REF;
|
||||
} else {
|
||||
ret |= PTR_UNTRUSTED;
|
||||
}
|
||||
return PTR_MAYBE_NULL | PTR_UNTRUSTED;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
|
||||
|
|
|
@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
|
|||
local_kptr_stash__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_refcount_acquire_without_unstash(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts,
|
||||
.data_in = &pkt_v4,
|
||||
.data_size_in = sizeof(pkt_v4),
|
||||
.repeat = 1,
|
||||
);
|
||||
struct local_kptr_stash *skel;
|
||||
int ret;
|
||||
|
||||
skel = local_kptr_stash__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
|
||||
return;
|
||||
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
|
||||
&opts);
|
||||
ASSERT_OK(ret, "refcount_acquire_without_unstash run");
|
||||
ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
|
||||
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
|
||||
ASSERT_OK(ret, "stash_refcounted_node run");
|
||||
ASSERT_OK(opts.retval, "stash_refcounted_node retval");
|
||||
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
|
||||
&opts);
|
||||
ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
|
||||
ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
|
||||
|
||||
local_kptr_stash__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_local_kptr_stash_fail(void)
|
||||
{
|
||||
RUN_TESTS(local_kptr_stash_fail);
|
||||
|
@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
|
|||
test_local_kptr_stash_plain();
|
||||
if (test__start_subtest("local_kptr_stash_unstash"))
|
||||
test_local_kptr_stash_unstash();
|
||||
if (test__start_subtest("refcount_acquire_without_unstash"))
|
||||
test_refcount_acquire_without_unstash();
|
||||
if (test__start_subtest("local_kptr_stash_fail"))
|
||||
test_local_kptr_stash_fail();
|
||||
}
|
||||
|
|
|
@ -14,6 +14,24 @@ struct node_data {
|
|||
struct bpf_rb_node node;
|
||||
};
|
||||
|
||||
struct refcounted_node {
|
||||
long data;
|
||||
struct bpf_rb_node rb_node;
|
||||
struct bpf_refcount refcount;
|
||||
};
|
||||
|
||||
struct stash {
|
||||
struct bpf_spin_lock l;
|
||||
struct refcounted_node __kptr *stashed;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct stash);
|
||||
__uint(max_entries, 10);
|
||||
} refcounted_node_stash SEC(".maps");
|
||||
|
||||
struct plain_local {
|
||||
long key;
|
||||
long data;
|
||||
|
@ -38,6 +56,7 @@ struct map_value {
|
|||
* Had to do the same w/ bpf_kfunc_call_test_release below
|
||||
*/
|
||||
struct node_data *just_here_because_btf_bug;
|
||||
struct refcounted_node *just_here_because_btf_bug2;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
|
@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
long refcount_acquire_without_unstash(void *ctx)
|
||||
{
|
||||
struct refcounted_node *p;
|
||||
struct stash *s;
|
||||
int ret = 0;
|
||||
|
||||
s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
|
||||
if (!s)
|
||||
return 1;
|
||||
|
||||
if (!s->stashed)
|
||||
/* refcount_acquire failure is expected when no refcounted_node
|
||||
* has been stashed before this program executes
|
||||
*/
|
||||
return 2;
|
||||
|
||||
p = bpf_refcount_acquire(s->stashed);
|
||||
if (!p)
|
||||
return 3;
|
||||
|
||||
ret = s->stashed ? s->stashed->data : -1;
|
||||
bpf_obj_drop(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Helper for refcount_acquire_without_unstash test */
|
||||
SEC("tc")
|
||||
long stash_refcounted_node(void *ctx)
|
||||
{
|
||||
struct refcounted_node *p;
|
||||
struct stash *s;
|
||||
int key = 0;
|
||||
|
||||
s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
|
||||
if (!s)
|
||||
return 1;
|
||||
|
||||
p = bpf_obj_new(typeof(*p));
|
||||
if (!p)
|
||||
return 2;
|
||||
p->data = 42;
|
||||
|
||||
p = bpf_kptr_xchg(&s->stashed, p);
|
||||
if (p) {
|
||||
bpf_obj_drop(p);
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
|
|
@ -53,6 +53,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
||||
long refcount_acquire_maybe_null(void *ctx)
|
||||
{
|
||||
struct node_acquire *n, *m;
|
||||
|
||||
n = bpf_obj_new(typeof(*n));
|
||||
/* Intentionally not testing !n
|
||||
* it's MAYBE_NULL for refcount_acquire
|
||||
*/
|
||||
m = bpf_refcount_acquire(n);
|
||||
if (m)
|
||||
bpf_obj_drop(m);
|
||||
if (n)
|
||||
bpf_obj_drop(n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("Unreleased reference id=3 alloc_insn=9")
|
||||
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
|
||||
|
|
Loading…
Reference in a new issue