bpf: Add basic bpf_rb_{root,node} support

This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.

structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.

btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.

btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.

After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Dave Marchevsky 2023-02-13 16:40:10 -08:00 committed by Alexei Starovoitov
parent 6a3cd3318f
commit 9c395c1b99
8 changed files with 216 additions and 73 deletions

View file

@ -181,7 +181,10 @@ enum btf_field_type {
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
BPF_LIST_HEAD = (1 << 4),
BPF_LIST_NODE = (1 << 5),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD,
BPF_RB_ROOT = (1 << 6),
BPF_RB_NODE = (1 << 7),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
BPF_RB_NODE | BPF_RB_ROOT,
};
struct btf_field_kptr {
@ -285,6 +288,10 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "bpf_list_head";
case BPF_LIST_NODE:
return "bpf_list_node";
case BPF_RB_ROOT:
return "bpf_rb_root";
case BPF_RB_NODE:
return "bpf_rb_node";
default:
WARN_ON_ONCE(1);
return "unknown";
@ -305,6 +312,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_list_head);
case BPF_LIST_NODE:
return sizeof(struct bpf_list_node);
case BPF_RB_ROOT:
return sizeof(struct bpf_rb_root);
case BPF_RB_NODE:
return sizeof(struct bpf_rb_node);
default:
WARN_ON_ONCE(1);
return 0;
@ -325,6 +336,10 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_list_head);
case BPF_LIST_NODE:
return __alignof__(struct bpf_list_node);
case BPF_RB_ROOT:
return __alignof__(struct bpf_rb_root);
case BPF_RB_NODE:
return __alignof__(struct bpf_rb_node);
default:
WARN_ON_ONCE(1);
return 0;
@ -435,6 +450,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
void bpf_timer_cancel_and_free(void *timer);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
struct bpf_spin_lock *spin_lock);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);

View file

@ -6917,6 +6917,17 @@ struct bpf_list_node {
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_rb_root {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_rb_node {
__u64 :64;
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.

View file

@ -3324,12 +3324,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
return NULL;
}
static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
const struct btf_type *t, int comp_idx,
u32 off, int sz, struct btf_field_info *info)
static int
btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
const struct btf_type *t, int comp_idx, u32 off,
int sz, struct btf_field_info *info,
enum btf_field_type head_type)
{
const char *node_field_name;
const char *value_type;
const char *list_node;
s32 id;
if (!__btf_type_is_struct(t))
@ -3339,26 +3341,32 @@ static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
if (!value_type)
return -EINVAL;
list_node = strstr(value_type, ":");
if (!list_node)
node_field_name = strstr(value_type, ":");
if (!node_field_name)
return -EINVAL;
value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
if (!value_type)
return -ENOMEM;
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
kfree(value_type);
if (id < 0)
return id;
list_node++;
if (str_is_empty(list_node))
node_field_name++;
if (str_is_empty(node_field_name))
return -EINVAL;
info->type = BPF_LIST_HEAD;
info->type = head_type;
info->off = off;
info->graph_root.value_btf_id = id;
info->graph_root.node_name = list_node;
info->graph_root.node_name = node_field_name;
return BTF_FIELD_FOUND;
}
#define field_mask_test_name(field_type, field_type_str) \
if (field_mask & field_type && !strcmp(name, field_type_str)) { \
type = field_type; \
goto end; \
}
static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
int *align, int *sz)
{
@ -3382,18 +3390,11 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
goto end;
}
}
if (field_mask & BPF_LIST_HEAD) {
if (!strcmp(name, "bpf_list_head")) {
type = BPF_LIST_HEAD;
goto end;
}
}
if (field_mask & BPF_LIST_NODE) {
if (!strcmp(name, "bpf_list_node")) {
type = BPF_LIST_NODE;
goto end;
}
}
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) {
type = BPF_KPTR_REF;
@ -3406,6 +3407,8 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
return type;
}
#undef field_mask_test_name
static int btf_find_struct_field(const struct btf *btf,
const struct btf_type *t, u32 field_mask,
struct btf_field_info *info, int info_cnt)
@ -3438,6 +3441,7 @@ static int btf_find_struct_field(const struct btf *btf,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
case BPF_RB_NODE:
ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@ -3451,8 +3455,11 @@ static int btf_find_struct_field(const struct btf *btf,
return ret;
break;
case BPF_LIST_HEAD:
ret = btf_find_list_head(btf, t, member_type, i, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
case BPF_RB_ROOT:
ret = btf_find_graph_root(btf, t, member_type,
i, off, sz,
idx < info_cnt ? &info[idx] : &tmp,
field_type);
if (ret < 0)
return ret;
break;
@ -3499,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
case BPF_RB_NODE:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@ -3512,8 +3520,11 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
return ret;
break;
case BPF_LIST_HEAD:
ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
case BPF_RB_ROOT:
ret = btf_find_graph_root(btf, var, var_type,
-1, off, sz,
idx < info_cnt ? &info[idx] : &tmp,
field_type);
if (ret < 0)
return ret;
break;
@ -3615,8 +3626,11 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
return ret;
}
static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
static int btf_parse_graph_root(const struct btf *btf,
struct btf_field *field,
struct btf_field_info *info,
const char *node_type_name,
size_t node_type_align)
{
const struct btf_type *t, *n = NULL;
const struct btf_member *member;
@ -3638,13 +3652,13 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
n = btf_type_by_id(btf, member->type);
if (!__btf_type_is_struct(n))
return -EINVAL;
if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off)))
return -EINVAL;
offset = __btf_member_bit_offset(n, member);
if (offset % 8)
return -EINVAL;
offset /= 8;
if (offset % __alignof__(struct bpf_list_node))
if (offset % node_type_align)
return -EINVAL;
field->graph_root.btf = (struct btf *)btf;
@ -3656,6 +3670,20 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
return 0;
}
static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
{
return btf_parse_graph_root(btf, field, info, "bpf_list_node",
__alignof__(struct bpf_list_node));
}
static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
{
return btf_parse_graph_root(btf, field, info, "bpf_rb_node",
__alignof__(struct bpf_rb_node));
}
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
@ -3718,7 +3746,13 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
case BPF_RB_ROOT:
ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]);
if (ret < 0)
goto end;
break;
case BPF_LIST_NODE:
case BPF_RB_NODE:
break;
default:
ret = -EFAULT;
@ -3727,8 +3761,9 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->cnt++;
}
/* bpf_list_head requires bpf_spin_lock */
if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
/* bpf_{list_head, rb_node} require bpf_spin_lock */
if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
ret = -EINVAL;
goto end;
}
@ -3739,22 +3774,28 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
return ERR_PTR(ret);
}
#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
/* There are two owning types, kptr_ref and bpf_list_head. The former
* only supports storing kernel types, which can never store references
* to program allocated local types, atleast not yet. Hence we only need
* to ensure that bpf_list_head ownership does not form cycles.
/* There are three types that signify ownership of some other type:
* kptr_ref, bpf_list_head, bpf_rb_root.
* kptr_ref only supports storing kernel types, which can't store
* references to program allocated local types.
*
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
* does not form cycles.
*/
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD))
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
if (!(rec->fields[i].type & BPF_LIST_HEAD))
if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
@ -3762,39 +3803,47 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
return -EFAULT;
rec->fields[i].graph_root.value_rec = meta->record;
if (!(rec->field_mask & BPF_LIST_NODE))
/* We need to set value_rec for all root types, but no need
* to check ownership cycle for a type unless it's also a
* node type.
*/
if (!(rec->field_mask & GRAPH_NODE_MASK))
continue;
/* We need to ensure ownership acyclicity among all types. The
* proper way to do it would be to topologically sort all BTF
* IDs based on the ownership edges, since there can be multiple
* bpf_list_head in a type. Instead, we use the following
* reasoning:
* bpf_{list_head,rb_node} in a type. Instead, we use the
* following resaoning:
*
* - A type can only be owned by another type in user BTF if it
* has a bpf_list_node.
* has a bpf_{list,rb}_node. Let's call these node types.
* - A type can only _own_ another type in user BTF if it has a
* bpf_list_head.
* bpf_{list_head,rb_root}. Let's call these root types.
*
* We ensure that if a type has both bpf_list_head and
* bpf_list_node, its element types cannot be owning types.
* We ensure that if a type is both a root and node, its
* element types cannot be root types.
*
* To ensure acyclicity:
*
* When A only has bpf_list_head, ownership chain can be:
* When A is an root type but not a node, its ownership
* chain can be:
* A -> B -> C
* Where:
* - B has both bpf_list_head and bpf_list_node.
* - C only has bpf_list_node.
* - A is an root, e.g. has bpf_rb_root.
* - B is both a root and node, e.g. has bpf_rb_node and
* bpf_list_head.
* - C is only an root, e.g. has bpf_list_node
*
* When A has both bpf_list_head and bpf_list_node, some other
* type already owns it in the BTF domain, hence it can not own
* another owning type through any of the bpf_list_head edges.
* When A is both a root and node, some other type already
* owns it in the BTF domain, hence it can not own
* another root type through any of the ownership edges.
* A -> B
* Where:
* - B only has bpf_list_node.
* - A is both an root and node.
* - B is only an node.
*/
if (meta->record->field_mask & BPF_LIST_HEAD)
if (meta->record->field_mask & GRAPH_ROOT_MASK)
return -ELOOP;
}
return 0;
@ -5256,6 +5305,8 @@ static const char *alloc_obj_fields[] = {
"bpf_spin_lock",
"bpf_list_head",
"bpf_list_node",
"bpf_rb_root",
"bpf_rb_node",
};
static struct btf_struct_metas *
@ -5329,7 +5380,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size);
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
BPF_RB_ROOT | BPF_RB_NODE, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;

View file

@ -1772,6 +1772,46 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
}
}
/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
* 'rb_node *', so field name of rb_node within containing struct is not
* needed.
*
* Since bpf_rb_tree's node type has a corresponding struct btf_field with
* graph_root.node_offset, it's not necessary to know field name
* or type of node struct
*/
#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
for (pos = rb_first_postorder(root); \
pos && ({ n = rb_next_postorder(pos); 1; }); \
pos = n)
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
struct bpf_spin_lock *spin_lock)
{
struct rb_root_cached orig_root, *root = rb_root;
struct rb_node *pos, *n;
void *obj;
BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
__bpf_spin_lock_irqsave(spin_lock);
orig_root = *root;
*root = RB_ROOT_CACHED;
__bpf_spin_unlock_irqrestore(spin_lock);
bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
obj = pos;
obj -= field->graph_root.node_offset;
bpf_obj_free_fields(field->graph_root.value_rec, obj);
migrate_disable();
bpf_mem_free(&bpf_global_ma, obj);
migrate_enable();
}
}
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");

View file

@ -537,9 +537,6 @@ void btf_record_free(struct btf_record *rec)
return;
for (i = 0; i < rec->cnt; i++) {
switch (rec->fields[i].type) {
case BPF_SPIN_LOCK:
case BPF_TIMER:
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
if (rec->fields[i].kptr.module)
@ -548,7 +545,11 @@ void btf_record_free(struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
/* Nothing to release for bpf_list_head */
case BPF_RB_ROOT:
case BPF_RB_NODE:
case BPF_SPIN_LOCK:
case BPF_TIMER:
/* Nothing to release */
break;
default:
WARN_ON_ONCE(1);
@ -581,9 +582,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
new_rec->cnt = 0;
for (i = 0; i < rec->cnt; i++) {
switch (fields[i].type) {
case BPF_SPIN_LOCK:
case BPF_TIMER:
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
btf_get(fields[i].kptr.btf);
@ -594,7 +592,11 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
/* Nothing to acquire for bpf_list_head */
case BPF_RB_ROOT:
case BPF_RB_NODE:
case BPF_SPIN_LOCK:
case BPF_TIMER:
/* Nothing to acquire */
break;
default:
ret = -EFAULT;
@ -674,7 +676,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
continue;
bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
break;
case BPF_RB_ROOT:
if (WARN_ON_ONCE(rec->spin_lock_off < 0))
continue;
bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
break;
case BPF_LIST_NODE:
case BPF_RB_NODE:
break;
default:
WARN_ON_ONCE(1);
@ -1010,7 +1018,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EINVAL;
map->record = btf_parse_fields(btf, value_type,
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
BPF_RB_ROOT,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@ -1058,6 +1067,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
}
break;
case BPF_LIST_HEAD:
case BPF_RB_ROOT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY) {

View file

@ -14703,9 +14703,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
btf_record_has_field(map->record, BPF_RB_ROOT)) {
if (is_tracing_prog_type(prog_type)) {
verbose(env, "tracing progs cannot use bpf_list_head yet\n");
verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
return -EINVAL;
}
}

View file

@ -6917,6 +6917,17 @@ struct bpf_list_node {
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_rb_root {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_rb_node {
__u64 :64;
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.

View file

@ -58,12 +58,12 @@ static struct {
TEST(inner_map, pop_front)
TEST(inner_map, pop_back)
#undef TEST
{ "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
{ "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
{ "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
{ "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
{ "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
{ "map_compat_kprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "map_compat_kretprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "map_compat_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "map_compat_perf", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },