bpf: Introduce device-bound XDP programs

New flag BPF_F_XDP_DEV_BOUND_ONLY plus all the infra to have a way
to associate a netdev with a BPF program at load time.

netdevsim checks are dropped in favor of generic check in dev_xdp_attach.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-6-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Stanislav Fomichev 2023-01-19 14:15:24 -08:00 committed by Martin KaFai Lau
parent 89bbc53a4d
commit 2b3486bc2d
8 changed files with 113 additions and 38 deletions

View file

@ -315,10 +315,6 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
return -EINVAL; return -EINVAL;
} }
if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) {
NSIM_EA(bpf->extack, "program bound to different dev");
return -EINVAL;
}
state = bpf->prog->aux->offload->dev_priv; state = bpf->prog->aux->offload->dev_priv;
if (WARN_ON(strcmp(state->state, "xlated"))) { if (WARN_ON(strcmp(state->state, "xlated"))) {

View file

@ -1261,7 +1261,8 @@ struct bpf_prog_aux {
enum bpf_prog_type saved_dst_prog_type; enum bpf_prog_type saved_dst_prog_type;
enum bpf_attach_type saved_dst_attach_type; enum bpf_attach_type saved_dst_attach_type;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested; bool dev_bound; /* Program is bound to the netdev. */
bool offload_requested; /* Program is bound and offloaded to the netdev. */
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
bool func_proto_unreliable; bool func_proto_unreliable;
bool sleepable; bool sleepable;
@ -2451,7 +2452,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
int bpf_prog_offload_compile(struct bpf_prog *prog); int bpf_prog_offload_compile(struct bpf_prog *prog);
void bpf_prog_offload_destroy(struct bpf_prog *prog); void bpf_prog_dev_bound_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info, int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
struct bpf_prog *prog); struct bpf_prog *prog);
@ -2479,7 +2480,13 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
void unpriv_ebpf_notify(int new_state); void unpriv_ebpf_notify(int new_state);
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr);
void bpf_dev_bound_netdev_unregister(struct net_device *dev);
static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
return aux->dev_bound;
}
static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux)
{ {
@ -2507,12 +2514,21 @@ void sock_map_unhash(struct sock *sk);
void sock_map_destroy(struct sock *sk); void sock_map_destroy(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout); void sock_map_close(struct sock *sk, long timeout);
#else #else
static inline int bpf_prog_offload_init(struct bpf_prog *prog, static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog,
union bpf_attr *attr) union bpf_attr *attr)
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev)
{
}
static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
return false;
}
static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux)
{ {
return false; return false;

View file

@ -1156,6 +1156,11 @@ enum bpf_link_type {
*/ */
#define BPF_F_XDP_HAS_FRAGS (1U << 5) #define BPF_F_XDP_HAS_FRAGS (1U << 5)
/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded
* program becomes device-bound but can access XDP metadata.
*/
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
/* link_create.kprobe_multi.flags used in LINK_CREATE command for /* link_create.kprobe_multi.flags used in LINK_CREATE command for
* BPF_TRACE_KPROBE_MULTI attach type to create return probe. * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
*/ */

View file

@ -2553,8 +2553,8 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif #endif
bpf_free_used_maps(aux); bpf_free_used_maps(aux);
bpf_free_used_btfs(aux); bpf_free_used_btfs(aux);
if (bpf_prog_is_offloaded(aux)) if (bpf_prog_is_dev_bound(aux))
bpf_prog_offload_destroy(aux->prog); bpf_prog_dev_bound_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
if (aux->prog->has_callchain_buf) if (aux->prog->has_callchain_buf)
put_callchain_buffers(); put_callchain_buffers();

View file

@ -41,7 +41,7 @@ struct bpf_offload_dev {
struct bpf_offload_netdev { struct bpf_offload_netdev {
struct rhash_head l; struct rhash_head l;
struct net_device *netdev; struct net_device *netdev;
struct bpf_offload_dev *offdev; struct bpf_offload_dev *offdev; /* NULL when bound-only */
struct list_head progs; struct list_head progs;
struct list_head maps; struct list_head maps;
struct list_head offdev_netdevs; struct list_head offdev_netdevs;
@ -89,19 +89,17 @@ static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
INIT_LIST_HEAD(&ondev->progs); INIT_LIST_HEAD(&ondev->progs);
INIT_LIST_HEAD(&ondev->maps); INIT_LIST_HEAD(&ondev->maps);
down_write(&bpf_devs_lock);
err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params); err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
if (err) { if (err) {
netdev_warn(netdev, "failed to register for BPF offload\n"); netdev_warn(netdev, "failed to register for BPF offload\n");
goto err_unlock_free; goto err_free;
} }
list_add(&ondev->offdev_netdevs, &offdev->netdevs); if (offdev)
up_write(&bpf_devs_lock); list_add(&ondev->offdev_netdevs, &offdev->netdevs);
return 0; return 0;
err_unlock_free: err_free:
up_write(&bpf_devs_lock);
kfree(ondev); kfree(ondev);
return err; return err;
} }
@ -149,24 +147,26 @@ static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
struct net_device *netdev) struct net_device *netdev)
{ {
struct bpf_offload_netdev *ondev, *altdev; struct bpf_offload_netdev *ondev, *altdev = NULL;
struct bpf_offloaded_map *offmap, *mtmp; struct bpf_offloaded_map *offmap, *mtmp;
struct bpf_prog_offload *offload, *ptmp; struct bpf_prog_offload *offload, *ptmp;
ASSERT_RTNL(); ASSERT_RTNL();
down_write(&bpf_devs_lock);
ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params); ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
if (WARN_ON(!ondev)) if (WARN_ON(!ondev))
goto unlock; return;
WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params)); WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
list_del(&ondev->offdev_netdevs);
/* Try to move the objects to another netdev of the device */ /* Try to move the objects to another netdev of the device */
altdev = list_first_entry_or_null(&offdev->netdevs, if (offdev) {
struct bpf_offload_netdev, list_del(&ondev->offdev_netdevs);
offdev_netdevs); altdev = list_first_entry_or_null(&offdev->netdevs,
struct bpf_offload_netdev,
offdev_netdevs);
}
if (altdev) { if (altdev) {
list_for_each_entry(offload, &ondev->progs, offloads) list_for_each_entry(offload, &ondev->progs, offloads)
offload->netdev = altdev->netdev; offload->netdev = altdev->netdev;
@ -185,11 +185,9 @@ static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
WARN_ON(!list_empty(&ondev->progs)); WARN_ON(!list_empty(&ondev->progs));
WARN_ON(!list_empty(&ondev->maps)); WARN_ON(!list_empty(&ondev->maps));
kfree(ondev); kfree(ondev);
unlock:
up_write(&bpf_devs_lock);
} }
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
{ {
struct bpf_offload_netdev *ondev; struct bpf_offload_netdev *ondev;
struct bpf_prog_offload *offload; struct bpf_prog_offload *offload;
@ -199,7 +197,11 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
attr->prog_type != BPF_PROG_TYPE_XDP) attr->prog_type != BPF_PROG_TYPE_XDP)
return -EINVAL; return -EINVAL;
if (attr->prog_flags) if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
return -EINVAL;
if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
return -EINVAL; return -EINVAL;
offload = kzalloc(sizeof(*offload), GFP_USER); offload = kzalloc(sizeof(*offload), GFP_USER);
@ -214,11 +216,23 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
if (err) if (err)
goto err_maybe_put; goto err_maybe_put;
prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY);
down_write(&bpf_devs_lock); down_write(&bpf_devs_lock);
ondev = bpf_offload_find_netdev(offload->netdev); ondev = bpf_offload_find_netdev(offload->netdev);
if (!ondev) { if (!ondev) {
err = -EINVAL; if (bpf_prog_is_offloaded(prog->aux)) {
goto err_unlock; err = -EINVAL;
goto err_unlock;
}
/* When only binding to the device, explicitly
* create an entry in the hashtable.
*/
err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
if (err)
goto err_unlock;
ondev = bpf_offload_find_netdev(offload->netdev);
} }
offload->offdev = ondev->offdev; offload->offdev = ondev->offdev;
prog->aux->offload = offload; prog->aux->offload = offload;
@ -321,12 +335,25 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
up_read(&bpf_devs_lock); up_read(&bpf_devs_lock);
} }
void bpf_prog_offload_destroy(struct bpf_prog *prog) void bpf_prog_dev_bound_destroy(struct bpf_prog *prog)
{ {
struct bpf_offload_netdev *ondev;
struct net_device *netdev;
rtnl_lock();
down_write(&bpf_devs_lock); down_write(&bpf_devs_lock);
if (prog->aux->offload) if (prog->aux->offload) {
list_del_init(&prog->aux->offload->offloads);
netdev = prog->aux->offload->netdev;
__bpf_prog_offload_destroy(prog); __bpf_prog_offload_destroy(prog);
ondev = bpf_offload_find_netdev(netdev);
if (!ondev->offdev && list_empty(&ondev->progs))
__bpf_offload_dev_netdev_unregister(NULL, netdev);
}
up_write(&bpf_devs_lock); up_write(&bpf_devs_lock);
rtnl_unlock();
} }
static int bpf_prog_offload_translate(struct bpf_prog *prog) static int bpf_prog_offload_translate(struct bpf_prog *prog)
@ -621,7 +648,7 @@ static bool __bpf_offload_dev_match(struct bpf_prog *prog,
struct bpf_offload_netdev *ondev1, *ondev2; struct bpf_offload_netdev *ondev1, *ondev2;
struct bpf_prog_offload *offload; struct bpf_prog_offload *offload;
if (!bpf_prog_is_offloaded(prog->aux)) if (!bpf_prog_is_dev_bound(prog->aux))
return false; return false;
offload = prog->aux->offload; offload = prog->aux->offload;
@ -667,14 +694,21 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
struct net_device *netdev) struct net_device *netdev)
{ {
return __bpf_offload_dev_netdev_register(offdev, netdev); int err;
down_write(&bpf_devs_lock);
err = __bpf_offload_dev_netdev_register(offdev, netdev);
up_write(&bpf_devs_lock);
return err;
} }
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register); EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
struct net_device *netdev) struct net_device *netdev)
{ {
down_write(&bpf_devs_lock);
__bpf_offload_dev_netdev_unregister(offdev, netdev); __bpf_offload_dev_netdev_unregister(offdev, netdev);
up_write(&bpf_devs_lock);
} }
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
@ -708,6 +742,19 @@ void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
} }
EXPORT_SYMBOL_GPL(bpf_offload_dev_priv); EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);
void bpf_dev_bound_netdev_unregister(struct net_device *dev)
{
struct bpf_offload_netdev *ondev;
ASSERT_RTNL();
down_write(&bpf_devs_lock);
ondev = bpf_offload_find_netdev(dev);
if (ondev && !ondev->offdev)
__bpf_offload_dev_netdev_unregister(NULL, ondev->netdev);
up_write(&bpf_devs_lock);
}
static int __init bpf_offload_init(void) static int __init bpf_offload_init(void)
{ {
return rhashtable_init(&offdevs, &offdevs_params); return rhashtable_init(&offdevs, &offdevs_params);

View file

@ -2491,7 +2491,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
BPF_F_TEST_STATE_FREQ | BPF_F_TEST_STATE_FREQ |
BPF_F_SLEEPABLE | BPF_F_SLEEPABLE |
BPF_F_TEST_RND_HI32 | BPF_F_TEST_RND_HI32 |
BPF_F_XDP_HAS_FRAGS)) BPF_F_XDP_HAS_FRAGS |
BPF_F_XDP_DEV_BOUND_ONLY))
return -EINVAL; return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@ -2575,7 +2576,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
prog->aux->attach_btf = attach_btf; prog->aux->attach_btf = attach_btf;
prog->aux->attach_btf_id = attr->attach_btf_id; prog->aux->attach_btf_id = attr->attach_btf_id;
prog->aux->dst_prog = dst_prog; prog->aux->dst_prog = dst_prog;
prog->aux->offload_requested = !!attr->prog_ifindex; prog->aux->dev_bound = !!attr->prog_ifindex;
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
@ -2598,8 +2599,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
atomic64_set(&prog->aux->refcnt, 1); atomic64_set(&prog->aux->refcnt, 1);
prog->gpl_compatible = is_gpl ? 1 : 0; prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_offloaded(prog->aux)) { if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_offload_init(prog, attr); err = bpf_prog_dev_bound_init(prog, attr);
if (err) if (err)
goto free_prog_sec; goto free_prog_sec;
} }

View file

@ -9228,6 +9228,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported"); NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
return -EINVAL; return -EINVAL;
} }
if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) {
NL_SET_ERR_MSG(extack, "Program bound to different device");
return -EINVAL;
}
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
return -EINVAL; return -EINVAL;
@ -10830,6 +10834,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_shutdown(dev); dev_shutdown(dev);
dev_xdp_uninstall(dev); dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
netdev_offload_xstats_disable_all(dev); netdev_offload_xstats_disable_all(dev);

View file

@ -1156,6 +1156,11 @@ enum bpf_link_type {
*/ */
#define BPF_F_XDP_HAS_FRAGS (1U << 5) #define BPF_F_XDP_HAS_FRAGS (1U << 5)
/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded
* program becomes device-bound but can access XDP metadata.
*/
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
/* link_create.kprobe_multi.flags used in LINK_CREATE command for /* link_create.kprobe_multi.flags used in LINK_CREATE command for
* BPF_TRACE_KPROBE_MULTI attach type to create return probe. * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
*/ */