Merge branch 'bpf: Implement two type cast kfuncs'

Yonghong Song says:

====================

Currenty, a non-tracing bpf program typically has a single 'context' argument
with predefined uapi struct type. Following these uapi struct, user is able
to access other fields defined in uapi header. Inside the kernel, the
user-seen 'context' argument is replaced with 'kernel context' (or 'kctx'
in short) which can access more information than what uapi header provides.
To access other info not in uapi header, people typically do two things:
  (1). extend uapi to access more fields rooted from 'context'.
  (2). use bpf_probe_read_kernl() helper to read particular field based on
    kctx.
Using (1) needs uapi change and using (2) makes code more complex since
direct memory access is not allowed.

There are already a few instances trying to access more information from
kctx:
  . trying to access some fields from perf_event kctx ([1]).
  . trying to access some fields from xdp kctx ([2]).

This patch set tried to allow direct memory access for kctx fields
by introducing bpf_cast_to_kern_ctx() kfunc.

Martin mentioned a use case like type casting below:
  #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
basically a 'unsigned char *" casted to 'struct skb_shared_info *'. This patch
set tries to support such a use case as well with bpf_rdonly_cast().

For the patch series, Patch 1 added support for a kfunc available to all
prog types. Patch 2 added bpf_cast_to_kern_ctx() kfunc. Patch 3 added
bpf_rdonly_cast() kfunc. Patch 4 added a few positive and negative tests.

  [1] https://lore.kernel.org/bpf/ad15b398-9069-4a0e-48cb-4bb651ec3088@meta.com/
  [2] https://lore.kernel.org/bpf/20221109215242.1279993-1-john.fastabend@gmail.com/

Changelog:
  v3 -> v4:
    - remove unnecessary bpf_ctx_convert.t error checking
    - add and use meta.ret_btf_id instead of meta.arg_constant.value for
      bpf_cast_to_kern_ctx().
    - add PTR_TRUSTED to the return PTR_TO_BTF_ID type for bpf_cast_to_kern_ctx().
  v2 -> v3:
    - rebase on top of bpf-next (for merging conflicts)
    - add the selftest to s390x deny list
  rfcv1 -> v2:
    - break original one kfunc into two.
    - add missing error checks and error logs.
    - adapt to the new conventions in
      https://lore.kernel.org/all/20221118015614.2013203-1-memxor@gmail.com/
      for example, with __ign and __k suffix.
    - added support in fixup_kfunc_call() to replace kfunc calls with a single mov.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2022-11-20 15:43:37 -08:00
commit 99429b224f
7 changed files with 293 additions and 2 deletions

View file

@ -487,6 +487,7 @@ const struct btf_member *
btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, enum bpf_prog_type prog_type,
int arg);
int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
bool btf_types_are_same(const struct btf *btf1, u32 id1,
const struct btf *btf2, u32 id2);
#else
@ -531,6 +532,10 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
{
return NULL;
}
static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
enum bpf_prog_type prog_type) {
return -EINVAL;
}
static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
const struct btf *btf2, u32 id2)
{

View file

@ -199,6 +199,7 @@ DEFINE_IDR(btf_idr);
DEFINE_SPINLOCK(btf_idr_lock);
enum btf_kfunc_hook {
BTF_KFUNC_HOOK_COMMON,
BTF_KFUNC_HOOK_XDP,
BTF_KFUNC_HOOK_TC,
BTF_KFUNC_HOOK_STRUCT_OPS,
@ -5602,6 +5603,26 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
return kern_ctx_type->type;
}
int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type)
{
const struct btf_member *kctx_member;
const struct btf_type *conv_struct;
const struct btf_type *kctx_type;
u32 kctx_type_id;
conv_struct = bpf_ctx_convert.t;
/* get member for kernel ctx type */
kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1;
kctx_type_id = kctx_member->type;
kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id);
if (!btf_type_is_struct(kctx_type)) {
bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id);
return -EINVAL;
}
return kctx_type_id;
}
BTF_ID_LIST(bpf_ctx_convert_btf_id)
BTF_ID(struct, bpf_ctx_convert)
@ -7531,6 +7552,8 @@ static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
{
switch (prog_type) {
case BPF_PROG_TYPE_UNSPEC:
return BTF_KFUNC_HOOK_COMMON;
case BPF_PROG_TYPE_XDP:
return BTF_KFUNC_HOOK_XDP;
case BPF_PROG_TYPE_SCHED_CLS:
@ -7559,6 +7582,11 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf,
u32 kfunc_btf_id)
{
enum btf_kfunc_hook hook;
u32 *kfunc_flags;
kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
if (kfunc_flags)
return kfunc_flags;
hook = bpf_prog_type_to_kfunc_hook(prog_type);
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);

View file

@ -1879,6 +1879,16 @@ void bpf_task_release(struct task_struct *p)
put_task_struct_rcu_user(p);
}
void *bpf_cast_to_kern_ctx(void *obj)
{
return obj;
}
void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
{
return obj__ign;
}
__diag_pop();
BTF_SET8_START(generic_btf_ids)
@ -1901,10 +1911,21 @@ static const struct btf_kfunc_id_set generic_kfunc_set = {
.set = &generic_btf_ids,
};
BTF_ID_LIST(generic_dtor_ids)
BTF_ID(struct, task_struct)
BTF_ID(func, bpf_task_release)
BTF_SET8_START(common_btf_ids)
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
.owner = THIS_MODULE,
.set = &common_btf_ids,
};
static int __init kfunc_init(void)
{
int ret;
@ -1918,9 +1939,10 @@ static int __init kfunc_init(void)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
return ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
ARRAY_SIZE(generic_dtors),
THIS_MODULE);
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);

View file

@ -7907,6 +7907,7 @@ struct bpf_kfunc_call_arg_meta {
u32 ref_obj_id;
u8 release_regno;
bool r0_rdonly;
u32 ret_btf_id;
u64 r0_size;
struct {
u64 value;
@ -8151,6 +8152,8 @@ enum special_kfunc_type {
KF_bpf_list_push_back,
KF_bpf_list_pop_front,
KF_bpf_list_pop_back,
KF_bpf_cast_to_kern_ctx,
KF_bpf_rdonly_cast,
};
BTF_SET_START(special_kfunc_set)
@ -8160,6 +8163,8 @@ BTF_ID(func, bpf_list_push_front)
BTF_ID(func, bpf_list_push_back)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@ -8169,6 +8174,8 @@ BTF_ID(func, bpf_list_push_front)
BTF_ID(func, bpf_list_push_back)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
@ -8182,6 +8189,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = &regs[regno];
bool arg_mem_size = false;
if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
return KF_ARG_PTR_TO_CTX;
/* In this function, we verify the kfunc's BTF as per the argument type,
* leaving the rest of the verification with respect to the register
* type to our caller. When a set of conditions hold in the BTF type of
@ -8668,6 +8678,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
return -EINVAL;
}
if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
if (ret < 0)
return -EINVAL;
meta->ret_btf_id = ret;
}
break;
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
@ -8795,6 +8812,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
const struct btf_type *ret_t;
struct btf *desc_btf;
u32 *kfunc_flags;
@ -8874,7 +8892,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
const struct btf_type *ret_t;
struct btf *ret_btf;
u32 ret_btf_id;
@ -8922,6 +8939,23 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].btf = field->list_head.btf;
regs[BPF_REG_0].btf_id = field->list_head.value_btf_id;
regs[BPF_REG_0].off = field->list_head.node_offset;
} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
if (!ret_t || !btf_type_is_struct(ret_t)) {
verbose(env,
"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
return -EINVAL;
}
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].btf_id = meta.arg_constant.value;
} else {
verbose(env, "kernel function %s unhandled dynamic return type\n",
meta.func_name);
@ -15175,6 +15209,10 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_buf[1] = addr[1];
insn_buf[2] = *insn;
*cnt = 3;
} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
}
return 0;
}

View file

@ -71,6 +71,7 @@ trace_printk # trace_printk__load unexpected error:
trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?)
tracing_struct # failed to auto-attach: -524 (trampoline)
trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
type_cast # JIT does not support calling kernel function
unpriv_bpf_disabled # fentry
user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?)
verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)

View file

@ -0,0 +1,114 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <network_helpers.h>
#include "type_cast.skel.h"
static void test_xdp(void)
{
struct type_cast *skel;
int err, prog_fd;
char buf[128];
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 1,
);
skel = type_cast__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
bpf_program__set_autoload(skel->progs.md_xdp, true);
err = type_cast__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
prog_fd = bpf_program__fd(skel->progs.md_xdp);
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval");
ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex");
ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex");
ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name");
ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum");
out:
type_cast__destroy(skel);
}
static void test_tc(void)
{
struct type_cast *skel;
int err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
);
skel = type_cast__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
bpf_program__set_autoload(skel->progs.md_skb, true);
err = type_cast__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
prog_fd = bpf_program__fd(skel->progs.md_skb);
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
ASSERT_EQ(topts.retval, 0, "tc test_run retval");
ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len");
ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len");
ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len");
ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len");
ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare");
out:
type_cast__destroy(skel);
}
static const char * const negative_tests[] = {
"untrusted_ptr",
"kctx_u64",
};
static void test_negative(void)
{
struct bpf_program *prog;
struct type_cast *skel;
int i, err;
for (i = 0; i < ARRAY_SIZE(negative_tests); i++) {
skel = type_cast__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]);
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto out;
bpf_program__set_autoload(prog, true);
err = type_cast__load(skel);
ASSERT_ERR(err, "skel_load");
out:
type_cast__destroy(skel);
}
}
void test_type_cast(void)
{
if (test__start_subtest("xdp"))
test_xdp();
if (test__start_subtest("tc"))
test_tc();
if (test__start_subtest("negative"))
test_negative();
}

View file

@ -0,0 +1,83 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} enter_id SEC(".maps");
#define IFNAMSIZ 16
int ifindex, ingress_ifindex;
char name[IFNAMSIZ];
unsigned int inum;
unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
void *bpf_cast_to_kern_ctx(void *) __ksym;
void *bpf_rdonly_cast(void *, __u32) __ksym;
SEC("?xdp")
int md_xdp(struct xdp_md *ctx)
{
struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx);
struct net_device *dev;
dev = kctx->rxq->dev;
ifindex = dev->ifindex;
inum = dev->nd_net.net->ns.inum;
__builtin_memcpy(name, dev->name, IFNAMSIZ);
ingress_ifindex = ctx->ingress_ifindex;
return XDP_PASS;
}
SEC("?tc")
int md_skb(struct __sk_buff *skb)
{
struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb);
struct skb_shared_info *shared_info;
struct sk_buff *kskb2;
kskb_len = kskb->len;
/* Simulate the following kernel macro:
* #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
*/
shared_info = bpf_rdonly_cast(kskb->head + kskb->end,
bpf_core_type_id_kernel(struct skb_shared_info));
meta_len = shared_info->meta_len;
frag0_len = shared_info->frag_list->len;
/* kskb2 should be equal to kskb */
kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff));
kskb2_len = kskb2->len;
return 0;
}
SEC("?tp_btf/sys_enter")
int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
{
struct task_struct *task, *task_dup;
long *ptr;
task = bpf_get_current_task_btf();
task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct));
(void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
return 0;
}
SEC("?tracepoint/syscalls/sys_enter_nanosleep")
int kctx_u64(void *ctx)
{
u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64));
(void)kctx;
return 0;
}
char _license[] SEC("license") = "GPL";