Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-03-09

The following pull-request contains BPF updates for your *net-next* tree.

We've added 90 non-merge commits during the last 17 day(s) which contain
a total of 114 files changed, 5158 insertions(+), 1288 deletions(-).

The main changes are:

1) Faster bpf_redirect_map(), from Björn.

2) skmsg cleanup, from Cong.

3) Support for floating point types in BTF, from Ilya.

4) Documentation for sys_bpf commands, from Joe.

5) Support for sk_lookup in bpf_prog_test_run, form Lorenz.

6) Enable task local storage for tracing programs, from Song.

7) bpf_for_each_map_elem() helper, from Yonghong.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-03-09 18:07:05 -08:00
commit c1acda9807
114 changed files with 5158 additions and 1286 deletions

View File

@ -84,6 +84,7 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
#define BTF_KIND_FLOAT 16 /* Floating point */
Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@ -95,8 +96,8 @@ Each type contains the following common data::
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 28-30: unused
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
@ -452,6 +453,18 @@ map definition.
* ``offset``: the in-section offset of the variable
* ``size``: the size of the variable in bytes
2.2.16 BTF_KIND_FLOAT
~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: any valid offset
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FLOAT
* ``info.vlen``: 0
* ``size``: the size of the float type in bytes: 2, 4, 8, 12 or 16.
No additional type data follow ``btf_type``.
3. BTF Kernel API
*****************

View File

@ -12,9 +12,6 @@ BPF instruction-set.
The Cilium project also maintains a `BPF and XDP Reference Guide`_
that goes into great technical depth about the BPF Architecture.
The primary info for the bpf syscall is available in the `man-pages`_
for `bpf(2)`_.
BPF Type Format (BTF)
=====================
@ -35,6 +32,12 @@ Two sets of Questions and Answers (Q&A) are maintained.
bpf_design_QA
bpf_devel_QA
Syscall API
===========
The primary info for the bpf syscall is available in the `man-pages`_
for `bpf(2)`_. For more information about the userspace API, see
Documentation/userspace-api/ebpf/index.rst.
Helper functions
================

View File

@ -0,0 +1,17 @@
.. SPDX-License-Identifier: GPL-2.0
eBPF Userspace API
==================
eBPF is a kernel mechanism to provide a sandboxed runtime environment in the
Linux kernel for runtime extension and instrumentation without changing kernel
source code or loading kernel modules. eBPF programs can be attached to various
kernel subsystems, including networking, tracing and Linux security modules
(LSM).
For internal kernel documentation on eBPF, see Documentation/bpf/index.rst.
.. toctree::
:maxdepth: 1
syscall

View File

@ -0,0 +1,24 @@
.. SPDX-License-Identifier: GPL-2.0
eBPF Syscall
------------
:Authors: - Alexei Starovoitov <ast@kernel.org>
- Joe Stringer <joe@wand.net.nz>
- Michael Kerrisk <mtk.manpages@gmail.com>
The primary info for the bpf syscall is available in the `man-pages`_
for `bpf(2)`_.
bpf() subcommand reference
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. kernel-doc:: include/uapi/linux/bpf.h
:doc: eBPF Syscall Preamble
.. kernel-doc:: include/uapi/linux/bpf.h
:doc: eBPF Syscall Commands
.. Links:
.. _man-pages: https://www.kernel.org/doc/man-pages/
.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html

View File

@ -21,6 +21,7 @@ place where this information is gathered.
unshare
spec_ctrl
accelerators/ocxl
ebpf/index
ioctl/index
iommu
media/index

View File

@ -3233,6 +3233,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
F: Documentation/bpf/
F: Documentation/networking/filter.rst
F: Documentation/userspace-api/ebpf/
F: arch/*/net/*
F: include/linux/bpf*
F: include/linux/filter.h
@ -3247,6 +3248,7 @@ F: net/core/filter.c
F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
F: samples/bpf/
F: scripts/bpf_doc.py
F: tools/bpf/
F: tools/lib/bpf/
F: tools/testing/selftests/bpf/

View File

@ -2973,7 +2973,8 @@ static int virtnet_probe(struct virtio_device *vdev)
return -ENOMEM;
/* Set up network device as normal. */
dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
IFF_TX_SKB_NO_LINEAR;
dev->netdev_ops = &virtnet_netdev;
dev->features = NETIF_F_HIGHDMA;

View File

@ -39,6 +39,7 @@ struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;
struct bpf_func_state;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@ -117,6 +118,9 @@ struct bpf_map_ops {
void *owner, u32 size);
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
/* Misc helpers.*/
int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
* an inner map can be inserted to an outer map.
@ -129,6 +133,13 @@ struct bpf_map_ops {
bool (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1);
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags);
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
@ -295,6 +306,8 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
__BPF_ARG_TYPE_MAX,
};
@ -411,6 +424,8 @@ enum bpf_reg_type {
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
PTR_TO_FUNC, /* reg points to a bpf program function */
PTR_TO_MAP_KEY, /* reg points to a map element key */
};
/* The information passed from prog-specific *_is_valid_access
@ -506,6 +521,11 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_ARGS 12
/* The maximum number of arguments passed through registers
* a single function may have.
*/
#define MAX_BPF_FUNC_REG_ARGS 5
struct btf_func_model {
u8 ret_size;
u8 nr_args;
@ -1380,6 +1400,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@ -1429,9 +1453,9 @@ struct btf *bpf_get_btf_vmlinux(void);
/* Map specifics */
struct xdp_buff;
struct sk_buff;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
void __dev_flush(void);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx);
@ -1441,7 +1465,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@ -1470,6 +1493,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
@ -1499,6 +1525,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@ -1568,17 +1595,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
u32 key)
{
return NULL;
}
static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
u32 key)
{
return NULL;
}
static inline bool dev_map_can_have_prog(struct bpf_map *map)
{
return false;
@ -1590,6 +1606,7 @@ static inline void __dev_flush(void)
struct xdp_buff;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;
static inline
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@ -1614,12 +1631,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
return 0;
}
static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
return NULL;
}
static inline void __cpu_map_flush(void)
{
}
@ -1670,6 +1681,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
return -ENOTSUPP;
}
static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
return -ENOTSUPP;
}
static inline void bpf_map_put(struct bpf_map *map)
{
}
@ -1684,6 +1702,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
{
return NULL;
}
static inline void bpf_task_storage_free(struct task_struct *task)
{
}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@ -1768,22 +1790,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_BPF_STREAM_PARSER)
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
static inline int sock_map_prog_update(struct bpf_map *map,
struct bpf_prog *prog,
struct bpf_prog *old, u32 which)
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
return -EOPNOTSUPP;
}
#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
struct bpf_prog *prog)
{
@ -1801,20 +1825,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}
#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
@ -1886,6 +1897,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);

View File

@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit);
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
int __percpu *busy_counter);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,

View File

@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
}
static inline struct bpf_storage_blob *bpf_task(
const struct task_struct *task)
{
if (unlikely(!task->security))
return NULL;
return task->security + bpf_lsm_blob_sizes.lbs_task;
}
extern const struct bpf_func_proto bpf_inode_storage_get_proto;
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
void bpf_inode_storage_free(struct inode *inode);
void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_LSM */
@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
return NULL;
}
static inline struct bpf_storage_blob *bpf_task(
const struct task_struct *task)
{
return NULL;
}
static inline void bpf_inode_storage_free(struct inode *inode)
{
}
static inline void bpf_task_storage_free(struct task_struct *task)
{
}
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */

View File

@ -103,19 +103,17 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#if defined(CONFIG_BPF_STREAM_PARSER)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
#ifdef CONFIG_INET
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif

View File

@ -68,6 +68,8 @@ struct bpf_reg_state {
unsigned long raw1;
unsigned long raw2;
} raw;
u32 subprogno; /* for PTR_TO_FUNC */
};
/* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge.
@ -204,6 +206,7 @@ struct bpf_func_state {
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
bool in_callback_fn;
struct bpf_stack_state *stack;
};

View File

@ -646,7 +646,8 @@ struct bpf_redirect_info {
u32 flags;
u32 tgt_index;
void *tgt_value;
struct bpf_map *map;
u32 map_id;
enum bpf_map_type map_type;
u32 kern_flags;
struct bpf_nh_params nh;
};
@ -1472,4 +1473,32 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
void *lookup_elem(struct bpf_map *map, u32 key))
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
/* Lower bits of the flags are used as return code on lookup failure */
if (unlikely(flags > XDP_TX))
return XDP_ABORTED;
ri->tgt_value = lookup_elem(map, ifindex);
if (unlikely(!ri->tgt_value)) {
/* If the lookup fails we want to clear out the state in the
* redirect_info struct completely, so that if an eBPF program
* performs multiple lookups, the last one always takes
* precedence.
*/
ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
ri->map_type = BPF_MAP_TYPE_UNSPEC;
return flags;
}
ri->tgt_index = ifindex;
ri->map_id = map->id;
ri->map_type = map->map_type;
return XDP_REDIRECT;
}
#endif /* __LINUX_FILTER_H__ */

View File

@ -1518,6 +1518,8 @@ struct net_device_ops {
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
* @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
* @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
* skb_headlen(skb) == 0 (data starts from frag0)
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@ -1551,6 +1553,7 @@ enum netdev_priv_flags {
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_LIVE_RENAME_OK = 1<<30,
IFF_TX_SKB_NO_LINEAR = 1<<31,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@ -1577,12 +1580,14 @@ enum netdev_priv_flags {
#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
#define IFF_TEAM IFF_TEAM
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM
#define IFF_MACSEC IFF_MACSEC
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */
enum netdev_ml_priv_type {

View File

@ -42,6 +42,7 @@ struct audit_context;
struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@ -1351,6 +1352,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */
void *security;
#endif
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack;

View File

@ -656,6 +656,7 @@ typedef unsigned char *sk_buff_data_t;
* @protocol: Packet protocol from driver
* @destructor: Destruct function
* @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
* @_sk_redir: socket redirection information for skmsg
* @_nfct: Associated connection, if any (with nfctinfo bits)
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
@ -755,6 +756,9 @@ struct sk_buff {
void (*destructor)(struct sk_buff *skb);
};
struct list_head tcp_tsorted_anchor;
#ifdef CONFIG_NET_SOCK_MSG
unsigned long _sk_redir;
#endif
};
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)

View File

@ -56,8 +56,8 @@ struct sk_msg {
struct sk_psock_progs {
struct bpf_prog *msg_parser;
struct bpf_prog *skb_parser;
struct bpf_prog *skb_verdict;
struct bpf_prog *stream_parser;
struct bpf_prog *stream_verdict;
};
enum sk_psock_state_bits {
@ -70,12 +70,6 @@ struct sk_psock_link {
void *link_raw;
};
struct sk_psock_parser {
struct strparser strp;
bool enabled;
void (*saved_data_ready)(struct sock *sk);
};
struct sk_psock_work_state {
struct sk_buff *skb;
u32 len;
@ -90,7 +84,9 @@ struct sk_psock {
u32 eval;
struct sk_msg *cork;
struct sk_psock_progs progs;
struct sk_psock_parser parser;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
struct strparser strp;
#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
unsigned long state;
@ -100,6 +96,7 @@ struct sk_psock {
void (*saved_unhash)(struct sock *sk);
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
struct proto *sk_proto;
struct sk_psock_work_state work_state;
struct work_struct work;
@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
struct sk_psock *sk_psock_init(struct sock *sk, int node);
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
#else
static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
return -EOPNOTSUPP;
}
static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
}
static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
{
}
#endif
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
return psock;
}
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
if (psock->parser.enabled)
psock->parser.saved_data_ready(sk);
if (psock->saved_data_ready)
psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
}
@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
static inline void psock_progs_drop(struct sk_psock_progs *progs)
{
psock_set_prog(&progs->msg_parser, NULL);
psock_set_prog(&progs->skb_parser, NULL);
psock_set_prog(&progs->skb_verdict, NULL);
psock_set_prog(&progs->stream_parser, NULL);
psock_set_prog(&progs->stream_verdict, NULL);
}
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
{
if (!psock)
return false;
return psock->parser.enabled;
return !!psock->saved_data_ready;
}
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
/* We only have one bit so far. */
#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
static inline bool skb_bpf_ingress(const struct sk_buff *skb)
{
unsigned long sk_redir = skb->_sk_redir;
return sk_redir & BPF_F_INGRESS;
}
static inline void skb_bpf_set_ingress(struct sk_buff *skb)
{
skb->_sk_redir |= BPF_F_INGRESS;
}
static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
bool ingress)
{
skb->_sk_redir = (unsigned long)sk_redir;
if (ingress)
skb->_sk_redir |= BPF_F_INGRESS;
}
static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
{
unsigned long sk_redir = skb->_sk_redir;
return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
}
static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
{
skb->_sk_redir = 0;
}
#endif /* CONFIG_NET_SOCK_MSG */
#endif /* _LINUX_SKMSG_H */

View File

@ -883,36 +883,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
struct {
__u32 flags;
struct sock *sk_redir;
void *data_end;
} bpf;
};
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
{
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
}
static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
{
return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
}
static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
{
return TCP_SKB_CB(skb)->bpf.sk_redir;
}
static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
{
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
}
extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_IPV6)
@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
#ifdef CONFIG_NET_SOCK_MSG
struct sk_msg;
struct sk_psock;
#ifdef CONFIG_BPF_STREAM_PARSER
#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#else
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif
#ifdef CONFIG_CGROUP_BPF
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
struct sk_buff *skb,

View File

@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
}
#ifdef CONFIG_BPF_STREAM_PARSER
#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
#endif /* BPF_STREAM_PARSER */
#endif
#endif /* _UDP_H */

View File

@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
u32 key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock *xs;
if (key >= map->max_entries)
return NULL;
xs = READ_ONCE(m->xsk_map[key]);
return xs;
}
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void)
{
}
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
u32 key)
{
return NULL;
}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */

View File

@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
};
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(tgt, map) \
(((map->map_type == BPF_MAP_TYPE_DEVMAP || \
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
DECLARE_EVENT_CLASS(xdp_redirect_template,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
const struct bpf_map *map, u32 index),
enum bpf_map_type map_type,
u32 map_id, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index),
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
TP_STRUCT__entry(
__field(int, prog_id)
@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
),
TP_fast_assign(
u32 ifindex = 0, map_index = index;
if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
ifindex = index;
map_index = 0;
}
__entry->prog_id = xdp->aux->id;
__entry->act = XDP_REDIRECT;
__entry->ifindex = dev->ifindex;
__entry->err = err;
__entry->to_ifindex = map ? devmap_ifindex(tgt, map) :
index;
__entry->map_id = map ? map->id : 0;
__entry->map_index = map ? index : 0;
__entry->to_ifindex = ifindex;
__entry->map_id = map_id;
__entry->map_index = map_index;
),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
enum bpf_map_type map_type,
u32 map_id, u32 index),
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
enum bpf_map_type map_type,
u32 map_id, u32 index),
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
#define _trace_xdp_redirect(dev, xdp, to) \
trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
#define _trace_xdp_redirect(dev, xdp, to) \
trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
trace_xdp_redirect(dev, xdp, to, 0, map, index)
#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
trace_xdp_redirect_err(dev, xdp, to, err, map, index)
#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
enum bpf_map_type map_type,
u32 map_id, u32 index),
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
enum bpf_map_type map_type,
u32 map_id, u32 index),
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);
TRACE_EVENT(xdp_cpumap_kthread,

View File

@ -93,7 +93,717 @@ union bpf_iter_link_info {
} map;
};
/* BPF syscall commands, see bpf(2) man-page for details. */
/* BPF syscall commands, see bpf(2) man-page for more details. */
/**
* DOC: eBPF Syscall Preamble
*
* The operation to be performed by the **bpf**\ () system call is determined
* by the *cmd* argument. Each operation takes an accompanying argument,
* provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
* below). The size argument is the size of the union pointed to by *attr*.
*/
/**
* DOC: eBPF Syscall Commands
*
* BPF_MAP_CREATE
* Description
* Create a map and return a file descriptor that refers to the
* map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
* is automatically enabled for the new file descriptor.
*
* Applying **close**\ (2) to the file descriptor returned by
* **BPF_MAP_CREATE** will delete the map (but see NOTES).
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_MAP_LOOKUP_ELEM
* Description
* Look up an element with a given *key* in the map referred to
* by the file descriptor *map_fd*.
*
* The *flags* argument may be specified as one of the
* following:
*
* **BPF_F_LOCK**
* Look up the value of a spin-locked map without
* returning the lock. This must be specified if the
* elements contain a spinlock.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_UPDATE_ELEM
* Description
* Create or update an element (key/value pair) in a specified map.
*
* The *flags* argument should be specified as one of the
* following:
*
* **BPF_ANY**
* Create a new element or update an existing element.
* **BPF_NOEXIST**
* Create a new element only if it did not exist.
* **BPF_EXIST**
* Update an existing element.
* **BPF_F_LOCK**
* Update a spin_lock-ed map element.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
* **E2BIG**, **EEXIST**, or **ENOENT**.
*
* **E2BIG**
* The number of elements in the map reached the
* *max_entries* limit specified at map creation time.
* **EEXIST**
* If *flags* specifies **BPF_NOEXIST** and the element
* with *key* already exists in the map.
* **ENOENT**
* If *flags* specifies **BPF_EXIST** and the element with
* *key* does not exist in the map.
*
* BPF_MAP_DELETE_ELEM
* Description
* Look up and delete an element by key in a specified map.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_GET_NEXT_KEY
* Description
* Look up an element by key in a specified map and return the key
* of the next element. Can be used to iterate over all elements
* in the map.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* The following cases can be used to iterate over all elements of
* the map:
*
* * If *key* is not found, the operation returns zero and sets
* the *next_key* pointer to the key of the first element.
* * If *key* is found, the operation returns zero and sets the
* *next_key* pointer to the key of the next element.
* * If *key* is the last element, returns -1 and *errno* is set
* to **ENOENT**.
*
* May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
* **EINVAL** on error.
*
* BPF_PROG_LOAD
* Description
* Verify and load an eBPF program, returning a new file
* descriptor associated with the program.
*
* Applying **close**\ (2) to the file descriptor returned by
* **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
*
* The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
* automatically enabled for the new file descriptor.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_OBJ_PIN
* Description
* Pin an eBPF program or map referred by the specified *bpf_fd*
* to the provided *pathname* on the filesystem.
*
* The *pathname* argument must not contain a dot (".").
*
* On success, *pathname* retains a reference to the eBPF object,
* preventing deallocation of the object when the original
* *bpf_fd* is closed. This allow the eBPF object to live beyond
* **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
* process.
*
* Applying **unlink**\ (2) or similar calls to the *pathname*
* unpins the object from the filesystem, removing the reference.
* If no other file descriptors or filesystem nodes refer to the
* same object, it will be deallocated (see NOTES).
*
* The filesystem type for the parent directory of *pathname* must
* be **BPF_FS_MAGIC**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_OBJ_GET
* Description
* Open a file descriptor for the eBPF object pinned to the
* specified *pathname*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_PROG_ATTACH
* Description
* Attach an eBPF program to a *target_fd* at the specified
* *attach_type* hook.
*
* The *attach_type* specifies the eBPF attachment point to
* attach the program to, and must be one of *bpf_attach_type*
* (see below).
*
* The *attach_bpf_fd* must be a valid file descriptor for a
* loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
* or sock_ops type corresponding to the specified *attach_type*.
*
* The *target_fd* must be a valid file descriptor for a kernel
* object which depends on the attach type of *attach_bpf_fd*:
*
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
* **BPF_PROG_TYPE_CGROUP_SKB**,
* **BPF_PROG_TYPE_CGROUP_SOCK**,
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
* **BPF_PROG_TYPE_SOCK_OPS**
*
* Control Group v2 hierarchy with the eBPF controller
* enabled. Requires the kernel to be compiled with
* **CONFIG_CGROUP_BPF**.
*
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
*
* Network namespace (eg /proc/self/ns/net).
*
* **BPF_PROG_TYPE_LIRC_MODE2**
*
* LIRC device path (eg /dev/lircN). Requires the kernel
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
*
* **BPF_PROG_TYPE_SK_SKB**,
* **BPF_PROG_TYPE_SK_MSG**
*
* eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_DETACH
* Description
* Detach the eBPF program associated with the *target_fd* at the
* hook specified by *attach_type*. The program must have been
* previously attached using **BPF_PROG_ATTACH**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_TEST_RUN
* Description
* Run the eBPF program associated with the *prog_fd* a *repeat*
* number of times against a provided program context *ctx_in* and
* data *data_in*, and return the modified program context
* *ctx_out*, *data_out* (for example, packet data), result of the
* execution *retval*, and *duration* of the test run.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* **ENOSPC**
* Either *data_size_out* or *ctx_size_out* is too small.
* **ENOTSUPP**
* This command is not supported by the program type of
* the program referred to by *prog_fd*.
*
* BPF_PROG_GET_NEXT_ID
* Description
* Fetch the next eBPF program currently loaded into the kernel.
*
* Looks for the eBPF program with an id greater than *start_id*
* and updates *next_id* on success. If no other eBPF programs
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_MAP_GET_NEXT_ID
* Description
* Fetch the next eBPF map currently loaded into the kernel.
*
* Looks for the eBPF map with an id greater than *start_id*
* and updates *next_id* on success. If no other eBPF maps
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_PROG_GET_FD_BY_ID
* Description
* Open a file descriptor for the eBPF program corresponding to
* *prog_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_MAP_GET_FD_BY_ID
* Description
* Open a file descriptor for the eBPF map corresponding to
* *map_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_OBJ_GET_INFO_BY_FD
* Description
* Obtain information about the eBPF object corresponding to
* *bpf_fd*.
*
* Populates up to *info_len* bytes of *info*, which will be in
* one of the following formats depending on the eBPF object type
* of *bpf_fd*:
*
* * **struct bpf_prog_info**
* * **struct bpf_map_info**
* * **struct bpf_btf_info**
* * **struct bpf_link_info**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_QUERY
* Description
* Obtain information about eBPF programs associated with the
* specified *attach_type* hook.
*
* The *target_fd* must be a valid file descriptor for a kernel
* object which depends on the attach type of *attach_bpf_fd*:
*
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
* **BPF_PROG_TYPE_CGROUP_SKB**,
* **BPF_PROG_TYPE_CGROUP_SOCK**,
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
* **BPF_PROG_TYPE_SOCK_OPS**
*
* Control Group v2 hierarchy with the eBPF controller
* enabled. Requires the kernel to be compiled with
* **CONFIG_CGROUP_BPF**.
*
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
*
* Network namespace (eg /proc/self/ns/net).
*
* **BPF_PROG_TYPE_LIRC_MODE2**
*
* LIRC device path (eg /dev/lircN). Requires the kernel
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
*
* **BPF_PROG_QUERY** always fetches the number of programs
* attached and the *attach_flags* which were used to attach those
* programs. Additionally, if *prog_ids* is nonzero and the number
* of attached programs is less than *prog_cnt*, populates
* *prog_ids* with the eBPF program ids of the programs attached
* at *target_fd*.
*
* The following flags may alter the result:
*
* **BPF_F_QUERY_EFFECTIVE**
* Only return information regarding programs which are
* currently effective at the specified *target_fd*.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_RAW_TRACEPOINT_OPEN
* Description
* Attach an eBPF program to a tracepoint *name* to access kernel
* internal arguments of the tracepoint in their raw form.
*
* The *prog_fd* must be a valid file descriptor associated with
* a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
*
* No ABI guarantees are made about the content of tracepoint
* arguments exposed to the corresponding eBPF program.
*
* Applying **close**\ (2) to the file descriptor returned by
* **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_BTF_LOAD
* Description
* Verify and load BPF Type Format (BTF) metadata into the kernel,
* returning a new file descriptor associated with the metadata.
* BTF is described in more detail at
* https://www.kernel.org/doc/html/latest/bpf/btf.html.
*
* The *btf* parameter must point to valid memory providing
* *btf_size* bytes of BTF binary metadata.
*
* The returned file descriptor can be passed to other **bpf**\ ()
* subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
* associate the BTF with those objects.
*
* Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
* parameters to specify a *btf_log_buf*, *btf_log_size* and
* *btf_log_level* which allow the kernel to return freeform log
* output regarding the BTF verification process.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_BTF_GET_FD_BY_ID
* Description
* Open a file descriptor for the BPF Type Format (BTF)
* corresponding to *btf_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_TASK_FD_QUERY
* Description
* Obtain information about eBPF programs associated with the
* target process identified by *pid* and *fd*.
*
* If the *pid* and *fd* are associated with a tracepoint, kprobe
* or uprobe perf event, then the *prog_id* and *fd_type* will
* be populated with the eBPF program id and file descriptor type
* of type **bpf_task_fd_type**. If associated with a kprobe or
* uprobe, the *probe_offset* and *probe_addr* will also be
* populated. Optionally, if *buf* is provided, then up to
* *buf_len* bytes of *buf* will be populated with the name of
* the tracepoint, kprobe or uprobe.
*
* The resulting *prog_id* may be introspected in deeper detail
* using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_LOOKUP_AND_DELETE_ELEM
* Description
* Look up an element with the given *key* in the map referred to
* by the file descriptor *fd*, and if found, delete the element.
*
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
* implement this command as a "pop" operation, deleting the top
* element rather than one corresponding to *key*.
* The *key* and *key_len* parameters should be zeroed when
* issuing this operation for these map types.
*
* This command is only valid for the following map types:
* * **BPF_MAP_TYPE_QUEUE**
* * **BPF_MAP_TYPE_STACK**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_FREEZE
* Description
* Freeze the permissions of the specified map.
*
* Write permissions may be frozen by passing zero *flags*.
* Upon success, no future syscall invocations may alter the
* map state of *map_fd*. Write operations from eBPF programs
* are still possible for a frozen map.
*
* Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_BTF_GET_NEXT_ID
* Description
* Fetch the next BPF Type Format (BTF) object currently loaded
* into the kernel.
*
* Looks for the BTF object with an id greater than *start_id*
* and updates *next_id* on success. If no other BTF objects
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_MAP_LOOKUP_BATCH
* Description
* Iterate and fetch multiple elements in a map.
*
* Two opaque values are used to manage batch operations,
* *in_batch* and *out_batch*. Initially, *in_batch* must be set
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
* continue iteration from the current point.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
* and value size of the map *map_fd*. The *keys* buffer must be
* of *key_size* * *count*. The *values* buffer must be of
* *value_size* * *count*.
*
* The *elem_flags* argument may be specified as one of the
* following:
*
* **BPF_F_LOCK**
* Look up the value of a spin-locked map without
* returning the lock. This must be specified if the
* elements contain a spinlock.
*
* On success, *count* elements from the map are copied into the
* user buffer, with the keys copied into *keys* and the values
* copied into the corresponding indices in *values*.
*
* If an error is returned and *errno* is not **EFAULT**, *count*
* is set to the number of successfully processed elements.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* May set *errno* to **ENOSPC** to indicate that *keys* or
* *values* is too small to dump an entire bucket during
* iteration of a hash-based map type.
*
* BPF_MAP_LOOKUP_AND_DELETE_BATCH
* Description
* Iterate and delete all elements in a map.
*
* This operation has the same behavior as
* **BPF_MAP_LOOKUP_BATCH** with two exceptions:
*
* * Every element that is successfully returned is also deleted
* from the map. This is at least *count* elements. Note that
* *count* is both an input and an output parameter.
* * Upon returning with *errno* set to **EFAULT**, up to
* *count* elements may be deleted without returning the keys
* and values of the deleted elements.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_UPDATE_BATCH
* Description
* Update multiple elements in a map by *key*.
*
* The *keys* and *values* are input parameters which must point
* to memory large enough to hold *count* items based on the key
* and value size of the map *map_fd*. The *keys* buffer must be
* of *key_size* * *count*. The *values* buffer must be of
* *value_size* * *count*.
*
* Each element specified in *keys* is sequentially updated to the
* value in the corresponding index in *values*. The *in_batch*
* and *out_batch* parameters are ignored and should be zeroed.
*
* The *elem_flags* argument should be specified as one of the
* following:
*
* **BPF_ANY**
* Create new elements or update a existing elements.
* **BPF_NOEXIST**
* Create new elements only if they do not exist.
* **BPF_EXIST**
* Update existing elements.
* **BPF_F_LOCK**
* Update spin_lock-ed map elements. This must be
* specified if the map value contains a spinlock.
*
* On success, *count* elements from the map are updated.
*
* If an error is returned and *errno* is not **EFAULT**, *count*
* is set to the number of successfully processed elements.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
* **E2BIG**. **E2BIG** indicates that the number of elements in
* the map reached the *max_entries* limit specified at map
* creation time.
*
* May set *errno* to one of the following error codes under
* specific circumstances:
*
* **EEXIST**
* If *flags* specifies **BPF_NOEXIST** and the element
* with *key* already exists in the map.
* **ENOENT**
* If *flags* specifies **BPF_EXIST** and the element with
* *key* does not exist in the map.
*
* BPF_MAP_DELETE_BATCH
* Description
* Delete multiple elements in a map by *key*.
*
* The *keys* parameter is an input parameter which must point
* to memory large enough to hold *count* items based on the key
* size of the map *map_fd*, that is, *key_size* * *count*.
*
* Each element specified in *keys* is sequentially deleted. The
* *in_batch*, *out_batch*, and *values* parameters are ignored
* and should be zeroed.
*
* The *elem_flags* argument may be specified as one of the
* following:
*
* **BPF_F_LOCK**
* Look up the value of a spin-locked map without
* returning the lock. This must be specified if the
* elements contain a spinlock.
*
* On success, *count* elements from the map are updated.
*
* If an error is returned and *errno* is not **EFAULT**, *count*
* is set to the number of successfully processed elements. If
* *errno* is **EFAULT**, up to *count* elements may be been
* deleted.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_LINK_CREATE
* Description
* Attach an eBPF program to a *target_fd* at the specified
* *attach_type* hook and return a file descriptor handle for
* managing the link.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_LINK_UPDATE
* Description
* Update the eBPF program in the specified *link_fd* to
* *new_prog_fd*.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_LINK_GET_FD_BY_ID
* Description
* Open a file descriptor for the eBPF Link corresponding to
* *link_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_LINK_GET_NEXT_ID
* Description
* Fetch the next eBPF link currently loaded into the kernel.
*
* Looks for the eBPF link with an id greater than *start_id*
* and updates *next_id* on success. If no other eBPF links
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_ENABLE_STATS
* Description
* Enable eBPF runtime statistics gathering.
*
* Runtime statistics gathering for the eBPF runtime is disabled
* by default to minimize the corresponding performance overhead.
* This command enables statistics globally.
*
* Multiple programs may independently enable statistics.
* After gathering the desired statistics, eBPF runtime statistics
* may be disabled again by calling **close**\ (2) for the file
* descriptor returned by this function. Statistics will only be
* disabled system-wide when all outstanding file descriptors
* returned by prior calls for this subcommand are closed.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_ITER_CREATE
* Description
* Create an iterator on top of the specified *link_fd* (as
* previously created using **BPF_LINK_CREATE**) and return a
* file descriptor that can be used to trigger the iteration.
*
* If the resulting file descriptor is pinned to the filesystem
* using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
* for that path will trigger the iterator to read kernel state
* using the eBPF program attached to *link_fd*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_LINK_DETACH
* Description
* Forcefully detach the specified *link_fd* from its
* corresponding attachment point.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_BIND_MAP
* Description
* Bind a map to the lifetime of an eBPF program.
*
* The map identified by *map_fd* is bound to the program
* identified by *prog_fd* and only released when *prog_fd* is
* released. This may be used in cases where metadata should be
* associated with a program which otherwise does not contain any
* references to the map (for example, embedded in the eBPF
* program instructions).
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
* * After **fork**\ (2), the child inherits file descriptors
* referring to the same eBPF objects.
* * File descriptors referring to eBPF objects can be transferred over
* **unix**\ (7) domain sockets.
* * File descriptors referring to eBPF objects can be duplicated in the
* usual way, using **dup**\ (2) and similar calls.
* * File descriptors referring to eBPF objects can be pinned to the
* filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
*
* An eBPF object is deallocated only after all file descriptors referring
* to the object have been closed and no references remain pinned to the
* filesystem or attached (for example, bound to a program or device).
*/
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@ -393,6 +1103,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@ -720,7 +1439,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
* $ ./scripts/bpf_helpers_doc.py \
* $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@ -1765,6 +2484,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
* L2 type as Ethernet.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@ -3909,6 +4632,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -4075,6 +4826,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@ -4168,6 +4920,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@ -5205,7 +5958,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
union {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
};
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */

View File

@ -52,7 +52,7 @@ struct btf_type {
};
};
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
#define BTF_KIND_MAX BTF_KIND_DATASEC
#define BTF_KIND_FLOAT 16 /* Floating point */
#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately

View File

@ -1709,6 +1709,7 @@ config BPF_SYSCALL
select BPF
select IRQ_WORK
select TASKS_TRACE_RCU
select NET_SOCK_MSG if INET
default n
help
Enable the bpf() system call that allows to manipulate eBPF

View File

@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
endif

View File

@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
};
static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
u32 i, key, num_elems = 0;
struct bpf_array *array;
bool is_percpu;
u64 ret = 0;
void *val;
if (flags != 0)
return -EINVAL;
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
array = container_of(map, struct bpf_array, map);
if (is_percpu)
migrate_disable();
for (i = 0; i < map->max_entries; i++) {
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
else
val = array->value + array->elem_size * i;
num_elems++;
key = i;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)&key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret)
break;
}
if (is_percpu)
migrate_enable();
return num_elems;
}
static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
.map_meta_equal = array_map_meta_equal,
@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
.map_check_btf = array_map_check_btf,
.map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id,
.iter_seq_info = &iter_seq_info,
@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_delete_elem = array_map_delete_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id,
.iter_seq_info = &iter_seq_info,

View File

@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
bpf_local_storage_map_free(smap);
bpf_local_storage_map_free(smap, NULL);
}
static int inode_storage_map_btf_id;

View File

@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
*/
return ret == 0 ? 0 : -EAGAIN;
}
BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
void *, callback_ctx, u64, flags)
{
return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
}
const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.func = bpf_for_each_map_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_FUNC,
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};

View File

@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
unsigned long flags;
if (unlikely(!selem_linked_to_storage(selem)))
/* selem has already been unlinked from sk */
return;
local_storage = rcu_dereference(selem->local_storage);
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true);
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_local_storage)
kfree_rcu(local_storage, rcu);
@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
unsigned long flags;
if (unlikely(!selem_linked_to_map(selem)))
/* selem has already be unlinked from smap */
@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
smap = rcu_dereference(SDATA(selem)->smap);
b = select_bucket(smap, selem);
raw_spin_lock_bh(&b->lock);
raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem)))
hlist_del_init_rcu(&selem->map_node);
raw_spin_unlock_bh(&b->lock);
raw_spin_unlock_irqrestore(&b->lock, flags);
}
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
unsigned long flags;
raw_spin_lock_bh(&b->lock);
raw_spin_lock_irqsave(&b->lock, flags);
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
hlist_add_head_rcu(&selem->map_node, &b->list);
raw_spin_unlock_bh(&b->lock);
raw_spin_unlock_irqrestore(&b->lock, flags);
}
void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
sdata = SDATA(selem);
if (cacheit_lockit) {
unsigned long flags;
/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx],
sdata);
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}
return sdata;
@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
unsigned long flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
}
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
unlock:
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return SDATA(selem);
unlock_err:
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return ERR_PTR(err);
}
@ -468,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
spin_unlock(&cache->idx_lock);
}
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
int __percpu *busy_counter)
{
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map_bucket *b;
@ -497,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
while ((selem = hlist_entry_safe(
rcu_dereference_raw(hlist_first_rcu(&b->list)),
struct bpf_local_storage_elem, map_node))) {
if (busy_counter) {
migrate_disable();
__this_cpu_inc(*busy_counter);
}
bpf_selem_unlink(selem);
if (busy_counter) {
__this_cpu_dec(*busy_counter);
migrate_enable();
}
cond_resched_rcu();
}
rcu_read_unlock();

View File

@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:

View File

@ -15,21 +15,41 @@
#include <linux/bpf_local_storage.h>
#include <linux/filter.h>
#include <uapi/linux/btf.h>
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
DEFINE_BPF_STORAGE_CACHE(task_cache);
DEFINE_PER_CPU(int, bpf_task_storage_busy);
static void bpf_task_storage_lock(void)
{
migrate_disable();
__this_cpu_inc(bpf_task_storage_busy);
}
static void bpf_task_storage_unlock(void)
{
__this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
}
static bool bpf_task_storage_trylock(void)
{
migrate_disable();
if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
__this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
return false;
}
return true;
}
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
struct bpf_storage_blob *bsb;
bsb = bpf_task(task);
if (!bsb)
return NULL;
return &bsb->storage;
return &task->bpf_storage;
}
static struct bpf_local_storage_data *
@ -38,13 +58,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
{
struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap;
struct bpf_storage_blob *bsb;
bsb = bpf_task(task);
if (!bsb)
return NULL;
task_storage = rcu_dereference(bsb->storage);
task_storage = rcu_dereference(task->bpf_storage);
if (!task_storage)
return NULL;
@ -57,16 +72,12 @@ void bpf_task_storage_free(struct task_struct *task)
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
bool free_task_storage = false;
struct bpf_storage_blob *bsb;
struct hlist_node *n;
bsb = bpf_task(task);
if (!bsb)
return;
unsigned long flags;
rcu_read_lock();
local_storage = rcu_dereference(bsb->storage);
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage) {
rcu_read_unlock();
return;
@ -81,7 +92,8 @@ void bpf_task_storage_free(struct task_struct *task)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
raw_spin_lock_bh(&local_storage->lock);
bpf_task_storage_lock();
raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
/* Always unlink from map before unlinking from
* local_storage.
@ -90,7 +102,8 @@ void bpf_task_storage_free(struct task_struct *task)
free_task_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, false);
}
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_task_storage_unlock();
rcu_read_unlock();
/* free_task_storage should always be true as long as
@ -123,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
goto out;
}
bpf_task_storage_lock();
sdata = task_storage_lookup(task, map, true);
bpf_task_storage_unlock();
put_pid(pid);
return sdata ? sdata->data : NULL;
out:
@ -150,13 +165,15 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
*/
WARN_ON_ONCE(!rcu_read_lock_held());
task = pid_task(pid, PIDTYPE_PID);
if (!task || !task_storage_ptr(task)) {
if (!task) {
err = -ENOENT;
goto out;
}
bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags);
bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
out:
@ -199,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
goto out;
}
bpf_task_storage_lock();
err = task_storage_delete(task, map);
bpf_task_storage_unlock();
out:
put_pid(pid);
return err;
@ -213,44 +232,47 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;
/* explicitly check that the task_storage_ptr is not
* NULL as task_storage_lookup returns NULL in this case and
* bpf_local_storage_update expects the owner to have a
* valid storage pointer.
*/
if (!task || !task_storage_ptr(task))
if (!task)
return (unsigned long)NULL;
if (!bpf_task_storage_trylock())
return (unsigned long)NULL;
sdata = task_storage_lookup(task, map, true);
if (sdata)
return (unsigned long)sdata->data;
goto unlock;
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
return (unsigned long)NULL;
unlock:
bpf_task_storage_unlock();
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task)
{
int ret;
if (!task)
return -EINVAL;
if (!bpf_task_storage_trylock())
return -EBUSY;
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
return task_storage_delete(task, map);
ret = task_storage_delete(task, map);
bpf_task_storage_unlock();
return ret;
}
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@ -276,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
bpf_local_storage_map_free(smap);
bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
}
static int task_storage_map_btf_id;

View File

@ -173,7 +173,7 @@
#define BITS_ROUNDUP_BYTES(bits) \
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
#define BTF_INFO_MASK 0x8f00ffff
#define BTF_INFO_MASK 0x9f00ffff
#define BTF_INT_MASK 0x0fffffff
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
@ -280,6 +280,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
};
static const char *btf_type_str(const struct btf_type *t)
@ -574,6 +575,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
return true;
}
@ -1704,6 +1706,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
size = type->size;
goto resolved;
@ -1849,7 +1852,7 @@ static int btf_df_check_kflag_member(struct btf_verifier_env *env,
return -EINVAL;
}
/* Used for ptr, array and struct/union type members.
/* Used for ptr, array struct/union and float type members.
* int, enum and modifier types have their specific callback functions.
*/
static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
@ -3675,6 +3678,81 @@ static const struct btf_kind_operations datasec_ops = {
.show = btf_datasec_show,
};
static s32 btf_float_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
{
if (btf_type_vlen(t)) {
btf_verifier_log_type(env, t, "vlen != 0");
return -EINVAL;
}
if (btf_type_kflag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
t->size != 16) {
btf_verifier_log_type(env, t, "Invalid type_size");
return -EINVAL;
}
btf_verifier_log_type(env, t, NULL);
return 0;
}
static int btf_float_check_member(struct btf_verifier_env *env,
const struct btf_type *struct_type,
const struct btf_member *member,
const struct btf_type *member_type)
{
u64 start_offset_bytes;
u64 end_offset_bytes;
u64 misalign_bits;
u64 align_bytes;
u64 align_bits;
/* Different architectures have different alignment requirements, so
* here we check only for the reasonable minimum. This way we ensure
* that types after CO-RE can pass the kernel BTF verifier.
*/
align_bytes = min_t(u64, sizeof(void *), member_type->size);
align_bits = align_bytes * BITS_PER_BYTE;
div64_u64_rem(member->offset, align_bits, &misalign_bits);
if (misalign_bits) {
btf_verifier_log_member(env, struct_type, member,
"Member is not properly aligned");
return -EINVAL;
}
start_offset_bytes = member->offset / BITS_PER_BYTE;
end_offset_bytes = start_offset_bytes + member_type->size;
if (end_offset_bytes > struct_type->size) {
btf_verifier_log_member(env, struct_type, member,
"Member exceeds struct_size");
return -EINVAL;
}
return 0;
}
static void btf_float_log(struct btf_verifier_env *env,
const struct btf_type *t)
{
btf_verifier_log(env, "size=%u", t->size);
}
static const struct btf_kind_operations float_ops = {
.check_meta = btf_float_check_meta,
.resolve = btf_df_resolve,
.check_member = btf_float_check_member,
.check_kflag_member = btf_generic_check_kflag_member,
.log_details = btf_float_log,
.show = btf_df_show,
};
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
@ -3808,6 +3886,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
[BTF_KIND_VAR] = &var_ops,
[BTF_KIND_DATASEC] = &datasec_ops,
[BTF_KIND_FLOAT] = &float_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@ -4592,8 +4671,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
arg = off / 8;
args = (const struct btf_param *)(t + 1);
/* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
nr_args = t ? btf_type_vlen(t) : 5;
/* if (t == NULL) Fall back to default BPF prog with
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
*/
nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
if (prog->aux->attach_btf_trace) {
/* skip first 'void *__data' argument in btf_trace_##name typedef */
args++;
@ -4649,7 +4730,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
} else {
if (!t)
/* Default prog with 5 args */
/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
return true;
t = btf_type_by_id(btf, args[arg].type);
}
@ -5100,12 +5181,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
if (!func) {
/* BTF function prototype doesn't match the verifier types.
* Fall back to 5 u64 args.
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
*/
for (i = 0; i < 5; i++)
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
m->arg_size[i] = 8;
m->ret_size = 8;
m->nr_args = 5;
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
return 0;
}
args = (const struct btf_param *)(func + 1);
@ -5328,8 +5409,9 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
}
args = (const struct btf_param *)(t + 1);
nargs = btf_type_vlen(t);
if (nargs > 5) {
bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
MAX_BPF_FUNC_REG_ARGS);
goto out;
}
@ -5458,9 +5540,9 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
}
args = (const struct btf_param *)(t + 1);
nargs = btf_type_vlen(t);
if (nargs > 5) {
bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n",
tname, nargs);
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
return -EINVAL;
}
/* check that function returns int */

View File

@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
* complete.
*/
bpf_clear_redirect_map(map);
synchronize_rcu();
/* For cpu_map the remote CPUs can still be using the entries
@ -563,7 +562,7 @@ static void cpu_map_free(struct bpf_map *map)
kfree(cmap);
}
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
struct bpf_cpu_map_entry *rcpu;
@ -600,6 +599,11 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
}
static int cpu_map_btf_id;
const struct bpf_map_ops cpu_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@ -612,6 +616,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_cpu_map",
.map_btf_id = &cpu_map_btf_id,
.map_redirect = cpu_map_redirect,
};
static void bq_flush_to_queue(struct xdp_bulk_queue *bq)

View File

@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
list_del_rcu(&dtab->list);
spin_unlock(&dev_map_lock);
bpf_clear_redirect_map(map);
synchronize_rcu();
/* Make sure prior __dev_map_entry_free() have completed. */
@ -258,7 +257,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct hlist_head *head = dev_map_index_hash(dtab, key);
@ -392,7 +391,7 @@ void __dev_flush(void)
* update happens in parallel here a dev_put wont happen until after reading the
* ifindex.
*/
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *obj;
@ -735,6 +734,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
map, key, value, map_flags);
}
static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
}
static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
}
static int dev_map_btf_id;
const struct bpf_map_ops dev_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@ -747,6 +756,7 @@ const struct bpf_map_ops dev_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_dtab",
.map_btf_id = &dev_map_btf_id,
.map_redirect = dev_map_redirect,
};
static int dev_map_hash_map_btf_id;
@ -761,6 +771,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_dtab",
.map_btf_id = &dev_map_hash_map_btf_id,
.map_redirect = dev_hash_map_redirect,
};
static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,

View File

@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
};
static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
struct htab_elem *elem;
u32 roundup_key_size;
int i, num_elems = 0;
void __percpu *pptr;
struct bucket *b;
void *key, *val;
bool is_percpu;
u64 ret = 0;
if (flags != 0)
return -EINVAL;
is_percpu = htab_is_percpu(htab);
roundup_key_size = round_up(map->key_size, 8);
/* disable migration so percpu value prepared here will be the
* same as the one seen by the bpf program with bpf_map_lookup_elem().
*/
if (is_percpu)
migrate_disable();
for (i = 0; i < htab->n_buckets; i++) {
b = &htab->buckets[i];
rcu_read_lock();
head = &b->head;
hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
key = elem->key;
if (is_percpu) {
/* current cpu value for percpu map */
pptr = htab_elem_get_ptr(elem, map->key_size);
val = this_cpu_ptr(pptr);
} else {
val = elem->key + roundup_key_size;
}
num_elems++;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret) {
rcu_read_unlock();
goto out;
}
}
rcu_read_unlock();
}
out:
if (is_percpu)
migrate_enable();
return num_elems;
}
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
.map_delete_elem = htab_map_delete_elem,
.map_gen_lookup = htab_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id,
@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id,
@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id,
@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id,

View File

@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
break;
}

View File

@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_CALL;
}
static bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
@ -248,6 +254,7 @@ struct bpf_call_arg_meta {
u32 btf_id;
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
};
struct btf *btf_vmlinux;
@ -390,6 +397,24 @@ __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
env->prev_linfo = linfo;
}
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct tnum *range, const char *ctx,
const char *reg_name)
{
char tn_buf[48];
verbose(env, "At %s the register %s ", ctx, reg_name);
if (!tnum_is_unknown(reg->var_off)) {
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
tnum_strn(tn_buf, sizeof(tn_buf), *range);
verbose(env, " should have been in %s\n", tn_buf);
}
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
return type == PTR_TO_PACKET ||
@ -409,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
return type == PTR_TO_SOCKET ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON;
}
@ -451,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_CTX_OR_NULL ||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
type == ARG_PTR_TO_STACK_OR_NULL;
}
/* Determine whether the function releases some resources allocated by another
@ -541,6 +568,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
[PTR_TO_RDWR_BUF] = "rdwr_buf",
[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
};
static char slot_type_char[] = {
@ -612,6 +641,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (type_is_pkt_pointer(t))
verbose(env, ",r=%d", reg->range);
else if (t == CONST_PTR_TO_MAP ||
t == PTR_TO_MAP_KEY ||
t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
verbose(env, ",ks=%d,vs=%d",
@ -1519,7 +1549,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
}
ret = find_subprog(env, off);
if (ret >= 0)
return 0;
return ret;
if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
verbose(env, "too many subprograms\n");
return -E2BIG;
@ -1527,7 +1557,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
env->subprog_info[env->subprog_cnt++].start = off;
sort(env->subprog_info, env->subprog_cnt,
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
return 0;
return env->subprog_cnt - 1;
}
static int check_subprogs(struct bpf_verifier_env *env)
@ -1544,6 +1574,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
/* determine subprog starts. The end is one before the next starts */
for (i = 0; i < insn_cnt; i++) {
if (bpf_pseudo_func(insn + i)) {
if (!env->bpf_capable) {
verbose(env,
"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
return -EPERM;
}
ret = add_subprog(env, i + insn[i].imm + 1);
if (ret < 0)
return ret;
/* remember subprog */
insn[i + 1].imm = ret;
continue;
}
if (!bpf_pseudo_call(insn + i))
continue;
if (!env->bpf_capable) {
@ -2295,6 +2338,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_MEM_OR_NULL:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
return true;
default:
return false;
@ -2899,6 +2944,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
reg = &cur_regs(env)[regno];
switch (reg->type) {
case PTR_TO_MAP_KEY:
verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
mem_size, off, size);
break;
case PTR_TO_MAP_VALUE:
verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
mem_size, off, size);
@ -3304,6 +3353,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_FLOW_KEYS:
pointer_desc = "flow keys ";
break;
case PTR_TO_MAP_KEY:
pointer_desc = "key ";
break;
case PTR_TO_MAP_VALUE:
pointer_desc = "value ";
break;
@ -3405,7 +3457,7 @@ process_func:
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
if (!bpf_pseudo_call(insn + i))
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
/* remember insn and function to return to */
ret_insn[frame] = i + 1;
@ -3842,7 +3894,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
/* for access checks, reg->off is just part of off */
off += reg->off;
if (reg->type == PTR_TO_MAP_VALUE) {
if (reg->type == PTR_TO_MAP_KEY) {
if (t == BPF_WRITE) {
verbose(env, "write to change key R%d not allowed\n", regno);
return -EACCES;
}
err = check_mem_region_access(env, regno, off, size,
reg->map_ptr->key_size, false);
if (err)
return err;
if (value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_MAP_VALUE) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into map\n", value_regno);
@ -4258,6 +4322,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
return check_mem_region_access(env, regno, reg->off, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
if (check_map_access_type(env, regno, reg->off, access_size,
meta && meta->raw_mode ? BPF_WRITE :
@ -4474,6 +4541,7 @@ static const struct bpf_reg_types map_key_value_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
},
};
@ -4505,6 +4573,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
PTR_TO_RDONLY_BUF,
@ -4517,6 +4586,7 @@ static const struct bpf_reg_types int_ptr_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
},
};
@ -4529,6 +4599,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@ -4557,6 +4629,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@ -4738,6 +4812,8 @@ skip_type_check:
verbose(env, "verifier internal error\n");
return -EFAULT;
}
} else if (arg_type == ARG_PTR_TO_FUNC) {
meta->subprogno = reg->subprogno;
} else if (arg_type_is_mem_ptr(arg_type)) {
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
@ -5258,13 +5334,19 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
}
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
int insn_idx);
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_info_aux *func_info_aux;
struct bpf_func_state *caller, *callee;
int i, err, subprog, target_insn;
int err;
bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
@ -5273,14 +5355,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -E2BIG;
}
target_insn = *insn_idx + insn->imm;
subprog = find_subprog(env, target_insn + 1);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn + 1);
return -EFAULT;
}
caller = state->frame[state->curframe];
if (state->frame[state->curframe + 1]) {
verbose(env, "verifier bug. Frame %d already allocated\n",
@ -5335,11 +5409,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (err)
return err;
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
callee->regs[i] = caller->regs[i];
err = set_callee_state_cb(env, caller, callee, *insn_idx);
if (err)
return err;
clear_caller_saved_regs(env, caller->regs);
@ -5347,7 +5419,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
state->curframe++;
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
*insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
@ -5358,6 +5430,92 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return 0;
}
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee)
{
/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
* void *callback_ctx, u64 flags);
* callback_fn(struct bpf_map *map, void *key, void *value,
* void *callback_ctx);
*/
callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
/* pointer to stack or null */
callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
/* unused */
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
return 0;
}
static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx)
{
int i;
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
callee->regs[i] = caller->regs[i];
return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
int subprog, target_insn;
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn);
return -EFAULT;
}
return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
}
static int set_map_elem_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
int insn_idx)
{
struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
struct bpf_map *map;
int err;
if (bpf_map_ptr_poisoned(insn_aux)) {
verbose(env, "tail_call abusing map_ptr\n");
return -EINVAL;
}
map = BPF_MAP_PTR(insn_aux->map_ptr_state);
if (!map->ops->map_set_for_each_callback_args ||
!map->ops->map_for_each_callback) {
verbose(env, "callback function not allowed for map\n");
return -ENOTSUPP;
}
err = map->ops->map_set_for_each_callback_args(env, caller, callee);
if (err)
return err;
callee->in_callback_fn = true;
return 0;
}
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
@ -5380,8 +5538,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
state->curframe--;
caller = state->frame[state->curframe];
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
if (callee->in_callback_fn) {
/* enforce R0 return value range [0, 1]. */
struct tnum range = tnum_range(0, 1);
if (r0->type != SCALAR_VALUE) {
verbose(env, "R0 not a scalar value\n");
return -EACCES;
}
if (!tnum_in(range, r0->var_off)) {
verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
return -EINVAL;
}
} else {
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
}
/* Transfer references to the caller */
err = transfer_reference_state(caller, callee);
@ -5436,7 +5608,9 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_map_push_elem &&
func_id != BPF_FUNC_map_pop_elem &&
func_id != BPF_FUNC_map_peek_elem)
func_id != BPF_FUNC_map_peek_elem &&
func_id != BPF_FUNC_for_each_map_elem &&
func_id != BPF_FUNC_redirect_map)
return 0;
if (map == NULL) {
@ -5517,15 +5691,18 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct bpf_func_proto *fn = NULL;
struct bpf_reg_state *regs;
struct bpf_call_arg_meta meta;
int insn_idx = *insn_idx_p;
bool changes_data;
int i, err;
int i, err, func_id;
/* find function prototype */
func_id = insn->imm;
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
func_id);
@ -5571,7 +5748,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
err = check_func_arg(env, i, &meta, fn);
if (err)
return err;
@ -5621,6 +5798,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
if (func_id == BPF_FUNC_for_each_map_elem) {
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_map_elem_callback_state);
if (err < 0)
return -EINVAL;
}
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
@ -5874,6 +6058,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
else
*ptr_limit = -off;
return 0;
case PTR_TO_MAP_KEY:
/* Currently, this code is not exercised as the only use
* is bpf_for_each_map_elem() helper which requires
* bpf_capble. The code has been tested manually for
* future use.
*/
if (mask_to_left) {
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
} else {
off = ptr_reg->smin_value + ptr_reg->off;
*ptr_limit = ptr_reg->map_ptr->key_size - off;
}
return 0;
case PTR_TO_MAP_VALUE:
if (mask_to_left) {
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
@ -5904,7 +6101,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
aux->alu_limit != alu_limit))
return -EACCES;
/* Corresponding fixup done in fixup_bpf_calls(). */
/* Corresponding fixup done in do_misc_fixups(). */
aux->alu_state = alu_state;
aux->alu_limit = alu_limit;
return 0;
@ -6075,6 +6272,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
@ -8254,6 +8452,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
if (insn->src_reg == BPF_PSEUDO_FUNC) {
struct bpf_prog_aux *aux = env->prog->aux;
u32 subprogno = insn[1].imm;
if (!aux->func_info) {
verbose(env, "missing btf func_info\n");
return -EINVAL;
}
if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
verbose(env, "callback function not static\n");
return -EINVAL;
}
dst_reg->type = PTR_TO_FUNC;
dst_reg->subprogno = subprogno;
return 0;
}
map = env->used_maps[aux->map_index];
mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->map_ptr = map;
@ -8482,17 +8698,7 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (!tnum_in(range, reg->var_off)) {
char tn_buf[48];
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
tnum_strn(tn_buf, sizeof(tn_buf), range);
verbose(env, " should have been in %s\n", tn_buf);
verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
return -EINVAL;
}
@ -8619,6 +8825,27 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return DONE_EXPLORING;
}
static int visit_func_call_insn(int t, int insn_cnt,
struct bpf_insn *insns,
struct bpf_verifier_env *env,
bool visit_callee)
{
int ret;
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret)
return ret;
if (t + 1 < insn_cnt)
init_explored_state(env, t + 1);
if (visit_callee) {
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
env, false);
}
return ret;
}
/* Visits the instruction at index t and returns one of the following:
* < 0 - an error occurred
* DONE_EXPLORING - the instruction was fully explored
@ -8629,6 +8856,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
struct bpf_insn *insns = env->prog->insnsi;
int ret;
if (bpf_pseudo_func(insns + t))
return visit_func_call_insn(t, insn_cnt, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insns[t].code) != BPF_JMP &&
BPF_CLASS(insns[t].code) != BPF_JMP32)
@ -8639,18 +8869,8 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
return DONE_EXPLORING;
case BPF_CALL:
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret)
return ret;
if (t + 1 < insn_cnt)
init_explored_state(env, t + 1);
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
env, false);
}
return ret;
return visit_func_call_insn(t, insn_cnt, insns, env,
insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
if (BPF_SRC(insns[t].code) != BPF_K)
@ -9259,6 +9479,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
*/
return false;
}
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
@ -10105,10 +10326,9 @@ static int do_check(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn->imm, env->insn_idx);
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
return err;
} else if (opcode == BPF_JA) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->imm != 0 ||
@ -10537,6 +10757,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
goto next_insn;
}
if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
aux = &env->insn_aux_data[i];
aux->ptr_type = PTR_TO_FUNC;
goto next_insn;
}
/* In final convert_pseudo_ld_imm64() step, this is
* converted into regular 64-bit imm load insn.
*/
@ -10669,9 +10895,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
int insn_cnt = env->prog->len;
int i;
for (i = 0; i < insn_cnt; i++, insn++)
if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
insn->src_reg = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
continue;
if (insn->src_reg == BPF_PSEUDO_FUNC)
continue;
insn->src_reg = 0;
}
}
/* single env->prog->insni[off] instruction was replaced with the range
@ -11310,6 +11540,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
env->insn_aux_data[i].call_imm = insn->imm;
/* subprog is encoded in insn[1].imm */
continue;
}
if (!bpf_pseudo_call(insn))
continue;
/* Upon error here we cannot fall back to interpreter but
@ -11439,6 +11675,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < env->subprog_cnt; i++) {
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
if (bpf_pseudo_func(insn)) {
subprog = insn[1].imm;
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
continue;
}
if (!bpf_pseudo_call(insn))
continue;
subprog = insn->off;
@ -11484,6 +11726,11 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
insn[0].imm = env->insn_aux_data[i].call_imm;
insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
continue;
}
if (!bpf_pseudo_call(insn))
continue;
insn->off = env->insn_aux_data[i].call_imm;
@ -11548,6 +11795,14 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return -EINVAL;
}
for (i = 0; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
/* When JIT fails the progs with callback calls
* have to be rejected, since interpreter doesn't support them yet.
*/
verbose(env, "callbacks are not allowed in non-JITed programs\n");
return -EINVAL;
}
if (!bpf_pseudo_call(insn))
continue;
depth = get_callee_stack_depth(env, insn, i);
@ -11560,12 +11815,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
/* fixup insn->imm field of bpf_call instructions
* and inline eligible helpers as explicit sequence of BPF instructions
*
* this function is called after eBPF program passed verification
/* Do various post-verification rewrites in a single program pass.
* These rewrites simplify JIT and interpreter implementations.
*/
static int fixup_bpf_calls(struct bpf_verifier_env *env)
static int do_misc_fixups(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
bool expect_blinding = bpf_jit_blinding_enabled(prog);
@ -11580,6 +11833,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
/* Make divide-by-zero exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
@ -11620,6 +11874,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
continue;
}
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS ||
BPF_MODE(insn->code) == BPF_IND)) {
@ -11639,6 +11894,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
continue;
}
/* Rewrite pointer arithmetic to mitigate speculation attacks. */
if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
@ -11787,7 +12043,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->imm == BPF_FUNC_map_delete_elem ||
insn->imm == BPF_FUNC_map_push_elem ||
insn->imm == BPF_FUNC_map_pop_elem ||
insn->imm == BPF_FUNC_map_peek_elem)) {
insn->imm == BPF_FUNC_map_peek_elem ||
insn->imm == BPF_FUNC_redirect_map)) {
aux = &env->insn_aux_data[i + delta];
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
@ -11829,6 +12086,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_redirect,
(int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
patch_map_ops_generic:
switch (insn->imm) {
case BPF_FUNC_map_lookup_elem:
@ -11855,11 +12115,16 @@ patch_map_ops_generic:
insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
__bpf_call_base;
continue;
case BPF_FUNC_redirect_map:
insn->imm = BPF_CAST_CALL(ops->map_redirect) -
__bpf_call_base;
continue;
}
goto patch_call_imm;
}
/* Implement bpf_jiffies64 inline. */
if (prog->jit_requested && BITS_PER_LONG == 64 &&
insn->imm == BPF_FUNC_jiffies64) {
struct bpf_insn ld_jiffies_addr[2] = {
@ -12670,7 +12935,7 @@ skip_full_check:
ret = convert_ctx_accesses(env);
if (ret == 0)
ret = fixup_bpf_calls(env);
ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched
* insns could be handled correctly.

View File

@ -96,6 +96,7 @@
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
@ -2064,6 +2066,9 @@ static __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(p->bpf_storage, NULL);
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);

View File

@ -1367,6 +1367,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
return NULL;
}

View File

@ -317,13 +317,9 @@ config BPF_STREAM_PARSER
select STREAM_PARSER
select NET_SOCK_MSG
help
Enabling this allows a stream parser to be used with
Enabling this allows a TCP stream parser to be used with
BPF_MAP_TYPE_SOCKMAP.
BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
It can be used to enforce socket policy, implement socket redirects,
etc.
config NET_FLOW_LIMIT
bool
depends on RPS

View File

@ -10,20 +10,86 @@
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
struct bpf_test_timer {
enum { NO_PREEMPT, NO_MIGRATE } mode;
u32 i;
u64 time_start, time_spent;
};
static void bpf_test_timer_enter(struct bpf_test_timer *t)
__acquires(rcu)
{
rcu_read_lock();
if (t->mode == NO_PREEMPT)
preempt_disable();
else
migrate_disable();
t->time_start = ktime_get_ns();
}
static void bpf_test_timer_leave(struct bpf_test_timer *t)
__releases(rcu)
{
t->time_start = 0;
if (t->mode == NO_PREEMPT)
preempt_enable();
else
migrate_enable();
rcu_read_unlock();
}
static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
__must_hold(rcu)
{
t->i++;
if (t->i >= repeat) {
/* We're done. */
t->time_spent += ktime_get_ns() - t->time_start;
do_div(t->time_spent, t->i);
*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
*err = 0;
goto reset;
}
if (signal_pending(current)) {
/* During iteration: we've been cancelled, abort. */
*err = -EINTR;
goto reset;
}
if (need_resched()) {
/* During iteration: we need to reschedule between runs. */
t->time_spent += ktime_get_ns() - t->time_start;
bpf_test_timer_leave(t);
cond_resched();
bpf_test_timer_enter(t);
}
/* Do another round. */
return true;
reset:
t->i = 0;
return false;
}
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
struct bpf_test_timer t = { NO_MIGRATE };
enum bpf_cgroup_storage_type stype;
u64 time_start, time_spent = 0;
int ret = 0;
u32 i;
int ret;
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@ -38,40 +104,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
if (!repeat)
repeat = 1;
rcu_read_lock();
migrate_disable();
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
bpf_test_timer_enter(&t);
do {
bpf_cgroup_storage_set(storage);
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = BPF_PROG_RUN(prog, ctx);
if (signal_pending(current)) {
ret = -EINTR;
break;
}
if (need_resched()) {
time_spent += ktime_get_ns() - time_start;
migrate_enable();
rcu_read_unlock();
cond_resched();
rcu_read_lock();
migrate_disable();
time_start = ktime_get_ns();
}
}
time_spent += ktime_get_ns() - time_start;
migrate_enable();
rcu_read_unlock();
do_div(time_spent, repeat);
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
bpf_test_timer_leave(&t);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
@ -674,18 +716,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
struct bpf_test_timer t = { NO_PREEMPT };
u32 size = kattr->test.data_size_in;
struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys *user_ctx;
struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0;
const struct ethhdr *eth;
unsigned int flags = 0;
u32 retval, duration;
void *data;
int ret;
u32 i;
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
@ -721,39 +762,15 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
ctx.data = data;
ctx.data_end = (__u8 *)data + size;
rcu_read_lock();
preempt_disable();
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
bpf_test_timer_enter(&t);
do {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size, flags);
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
if (signal_pending(current)) {
preempt_enable();
rcu_read_unlock();
ret = -EINTR;
goto out;
}
if (need_resched()) {
time_spent += ktime_get_ns() - time_start;
preempt_enable();
rcu_read_unlock();
cond_resched();
rcu_read_lock();
preempt_disable();
time_start = ktime_get_ns();
}
}
time_spent += ktime_get_ns() - time_start;
preempt_enable();
rcu_read_unlock();
do_div(time_spent, repeat);
duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
if (ret < 0)
goto out;
ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
retval, duration);
@ -766,3 +783,106 @@ out:
kfree(data);
return ret;
}
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
struct bpf_test_timer t = { NO_PREEMPT };
struct bpf_prog_array *progs = NULL;
struct bpf_sk_lookup_kern ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_sk_lookup *user_ctx;
u32 retval, duration;
int ret = -EINVAL;
if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
return -EINVAL;
if (kattr->test.flags || kattr->test.cpu)
return -EINVAL;
if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
kattr->test.data_size_out)
return -EINVAL;
if (!repeat)
repeat = 1;
user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
if (IS_ERR(user_ctx))
return PTR_ERR(user_ctx);
if (!user_ctx)
return -EINVAL;
if (user_ctx->sk)
goto out;
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
goto out;
if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
ret = -ERANGE;
goto out;
}
ctx.family = (u16)user_ctx->family;
ctx.protocol = (u16)user_ctx->protocol;
ctx.dport = (u16)user_ctx->local_port;
ctx.sport = (__force __be16)user_ctx->remote_port;
switch (ctx.family) {
case AF_INET:
ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
break;
#endif
default:
ret = -EAFNOSUPPORT;
goto out;
}
progs = bpf_prog_array_alloc(1, GFP_KERNEL);
if (!progs) {
ret = -ENOMEM;
goto out;
}
progs->items[0].prog = prog;
bpf_test_timer_enter(&t);
do {
ctx.selected_sk = NULL;
retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
if (ret < 0)
goto out;
user_ctx->cookie = 0;
if (ctx.selected_sk) {
if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
ret = -EOPNOTSUPP;
goto out;
}
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
out:
bpf_prog_array_free(progs);
kfree(user_ctx);
return ret;
}

View File

@ -16,7 +16,6 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-y += net-sysfs.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
@ -28,10 +27,13 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
endif
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o

View File

@ -89,7 +89,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
bpf_local_storage_map_free(smap);
bpf_local_storage_map_free(smap, NULL);
}
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)

View File

@ -1863,10 +1863,7 @@ static const struct bpf_func_proto bpf_sk_fullsock_proto = {
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
int err = __bpf_try_make_writable(skb, write_len);
bpf_compute_data_end_sk_skb(skb);
return err;
return __bpf_try_make_writable(skb, write_len);
}
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
@ -3412,6 +3409,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
BPF_F_ADJ_ROOM_ENCAP_L2( \
BPF_ADJ_ROOM_ENCAP_L2_MASK))
@ -3448,6 +3446,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;
if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
inner_mac_len < ETH_HLEN)
return -EINVAL;
if (skb->encapsulation)
return -EALREADY;
@ -3466,7 +3468,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
skb_set_inner_protocol(skb, skb->protocol);
if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
else
skb_set_inner_protocol(skb, skb->protocol);
skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);
@ -3577,7 +3583,6 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOMEM;
__skb_pull(skb, len_diff_abs);
}
bpf_compute_data_end_sk_skb(skb);
if (tls_sw_has_ctx_rx(skb->sk)) {
struct strp_msg *rxm = strp_msg(skb);
@ -3742,10 +3747,7 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
return __bpf_skb_change_tail(skb, new_len, flags);
}
static const struct bpf_func_proto sk_skb_change_tail_proto = {
@ -3808,10 +3810,7 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
u64, flags)
{
int ret = __bpf_skb_change_head(skb, head_room, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
return __bpf_skb_change_head(skb, head_room, flags);
}
static const struct bpf_func_proto sk_skb_change_head_proto = {
@ -3919,23 +3918,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
struct bpf_map *map, struct xdp_buff *xdp)
{
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
return dev_map_enqueue(fwd, xdp, dev_rx);
case BPF_MAP_TYPE_CPUMAP:
return cpu_map_enqueue(fwd, xdp, dev_rx);
case BPF_MAP_TYPE_XSKMAP:
return __xsk_map_redirect(fwd, xdp);
default:
return -EBADRQC;
}
return 0;
}
void xdp_do_flush(void)
{
__dev_flush();
@ -3944,71 +3926,52 @@ void xdp_do_flush(void)
}
EXPORT_SYMBOL_GPL(xdp_do_flush);
static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
{
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
return __dev_map_lookup_elem(map, index);
case BPF_MAP_TYPE_DEVMAP_HASH:
return __dev_map_hash_lookup_elem(map, index);
case BPF_MAP_TYPE_CPUMAP:
return __cpu_map_lookup_elem(map, index);
case BPF_MAP_TYPE_XSKMAP:
return __xsk_map_lookup_elem(map, index);
default:
return NULL;
}
}
void bpf_clear_redirect_map(struct bpf_map *map)
{
struct bpf_redirect_info *ri;
int cpu;
for_each_possible_cpu(cpu) {
ri = per_cpu_ptr(&bpf_redirect_info, cpu);
/* Avoid polluting remote cacheline due to writes if
* not needed. Once we pass this test, we need the
* cmpxchg() to make sure it hasn't been changed in
* the meantime by remote CPU.
*/
if (unlikely(READ_ONCE(ri->map) == map))
cmpxchg(&ri->map, map, NULL);
}
}
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->tgt_index;
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
int err;
ri->tgt_index = 0;
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (unlikely(!map)) {
fwd = dev_get_by_index_rcu(dev_net(dev), index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
switch (map_type) {
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
err = dev_map_enqueue(fwd, xdp, dev);
break;
case BPF_MAP_TYPE_CPUMAP:
err = cpu_map_enqueue(fwd, xdp, dev);
break;
case BPF_MAP_TYPE_XSKMAP:
err = __xsk_map_redirect(fwd, xdp);
break;
case BPF_MAP_TYPE_UNSPEC:
if (map_id == INT_MAX) {
fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
if (unlikely(!fwd)) {
err = -EINVAL;
break;
}
err = dev_xdp_enqueue(fwd, xdp, dev);
break;
}
err = dev_xdp_enqueue(fwd, xdp, dev);
} else {
err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
fallthrough;
default:
err = -EBADRQC;
}
if (unlikely(err))
goto err;
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
return 0;
err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
@ -4017,41 +3980,36 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog,
struct bpf_map *map)
void *fwd,
enum bpf_map_type map_type, u32 map_id)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
u32 index = ri->tgt_index;
void *fwd = ri->tgt_value;
int err = 0;
int err;
ri->tgt_index = 0;
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
struct bpf_dtab_netdev *dst = fwd;
err = dev_map_generic_redirect(dst, skb, xdp_prog);
switch (map_type) {
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
if (unlikely(err))
goto err;
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
struct xdp_sock *xs = fwd;
err = xsk_generic_rcv(xs, xdp);
break;
case BPF_MAP_TYPE_XSKMAP:
err = xsk_generic_rcv(fwd, xdp);
if (err)
goto err;
consume_skb(skb);
} else {
break;
default:
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
err = -EBADRQC;
goto err;
}
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
return 0;
err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
return err;
}
@ -4059,31 +4017,34 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->tgt_index;
struct net_device *fwd;
int err = 0;
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
int err;
if (map)
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
map);
ri->tgt_index = 0;
fwd = dev_get_by_index_rcu(dev_net(dev), index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
err = xdp_ok_fwd_dev(fwd, skb->len);
if (unlikely(err))
goto err;
skb->dev = fwd;
_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
generic_xdp_tx(skb, xdp_prog);
return 0;
}
err = xdp_ok_fwd_dev(fwd, skb->len);
if (unlikely(err))
goto err;
skb->dev = fwd;
_trace_xdp_redirect(dev, xdp_prog, index);
generic_xdp_tx(skb, xdp_prog);
return 0;
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
return err;
}
@ -4094,10 +4055,12 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
if (unlikely(flags))
return XDP_ABORTED;
ri->flags = flags;
/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
* by map_idr) is used for ifindex based XDP redirect.
*/
ri->tgt_index = ifindex;
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
ri->map_id = INT_MAX;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
return XDP_REDIRECT;
}
@ -4113,28 +4076,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
/* Lower bits of the flags are used as return code on lookup failure */
if (unlikely(flags > XDP_TX))
return XDP_ABORTED;
ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
if (unlikely(!ri->tgt_value)) {
/* If the lookup fails we want to clear out the state in the
* redirect_info struct completely, so that if an eBPF program
* performs multiple lookups, the last one always takes
* precedence.
*/
WRITE_ONCE(ri->map, NULL);
return flags;
}
ri->flags = flags;
ri->tgt_index = ifindex;
WRITE_ONCE(ri->map, map);
return XDP_REDIRECT;
return map->ops->map_redirect(map, ifindex, flags);
}
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
@ -9655,22 +9597,40 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
/* data_end = skb->data + skb_headlen() */
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
/* si->dst_reg = skb->data */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, data));
/* AX = skb->len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, len));
/* si->dst_reg = skb->data + skb->len */
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
/* AX = skb->data_len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, data_len));
/* si->dst_reg = skb->data + skb->len - skb->data_len */
*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
return insn;
}
static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
int off;
switch (si->off) {
case offsetof(struct __sk_buff, data_end):
off = si->off;
off -= offsetof(struct __sk_buff, data_end);
off += offsetof(struct sk_buff, cb);
off += offsetof(struct tcp_skb_cb, bpf.data_end);
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
insn = bpf_convert_data_end_access(si, insn);
break;
default:
return bpf_convert_ctx_access(type, si, insn_buf, prog,
@ -10449,6 +10409,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
}
const struct bpf_prog_ops sk_lookup_prog_ops = {
.test_run = bpf_prog_test_run_sk_lookup,
};
const struct bpf_verifier_ops sk_lookup_verifier_ops = {

View File

@ -525,7 +525,8 @@ static void sk_psock_backlog(struct work_struct *work)
len = skb->len;
off = 0;
start:
ingress = tcp_skb_bpf_ingress(skb);
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
ret = -EIO;
if (likely(psock->sk->sk_socket))
@ -618,7 +619,7 @@ struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
return link;
}
void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
{
struct sk_msg *msg, *tmp;
@ -631,7 +632,12 @@ void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
static void sk_psock_zap_ingress(struct sk_psock *psock)
{
__skb_queue_purge(&psock->ingress_skb);
struct sk_buff *skb;
while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) {
skb_bpf_redirect_clear(skb);
kfree_skb(skb);
}
__sk_psock_purge_ingress_msg(psock);
}
@ -645,15 +651,15 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
}
}
static void sk_psock_done_strp(struct sk_psock *psock);
static void sk_psock_destroy_deferred(struct work_struct *gc)
{
struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
/* No sk_callback_lock since already detached. */
/* Parser has been stopped */
if (psock->progs.skb_parser)
strp_done(&psock->parser.strp);
sk_psock_done_strp(psock);
cancel_work_sync(&psock->work);
@ -685,9 +691,9 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
write_lock_bh(&sk->sk_callback_lock);
sk_psock_restore_proto(sk, psock);
rcu_assign_sk_user_data(sk, NULL);
if (psock->progs.skb_parser)
if (psock->progs.stream_parser)
sk_psock_stop_strp(sk, psock);
else if (psock->progs.skb_verdict)
else if (psock->progs.stream_verdict)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
@ -743,27 +749,12 @@ out:
}
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
struct sk_buff *skb)
{
bpf_compute_data_end_sk_skb(skb);
return bpf_prog_run_pin_on_cpu(prog, skb);
}
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
{
struct sk_psock_parser *parser;
parser = container_of(strp, struct sk_psock_parser, strp);
return container_of(parser, struct sk_psock, parser);
}
static void sk_psock_skb_redirect(struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
sk_other = tcp_skb_bpf_redirect_fetch(skb);
sk_other = skb_bpf_redirect_fetch(skb);
/* This error is a buggy BPF program, it returned a redirect
* return code, but then didn't set a redirect interface.
*/
@ -806,16 +797,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
int ret = __SK_PASS;
rcu_read_lock();
prog = READ_ONCE(psock->progs.skb_verdict);
prog = READ_ONCE(psock->progs.stream_verdict);
if (likely(prog)) {
/* We skip full set_owner_r here because if we do a SK_PASS
* or SK_DROP we can skip skb memory accounting and use the
* TLS context.
*/
skb->sk = psock->sk;
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
sk_psock_tls_verdict_apply(skb, psock->sk, ret);
@ -827,7 +819,6 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
static void sk_psock_verdict_apply(struct sk_psock *psock,
struct sk_buff *skb, int verdict)
{
struct tcp_skb_cb *tcp;
struct sock *sk_other;
int err = -EIO;
@ -839,8 +830,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
goto out_free;
}
tcp = TCP_SKB_CB(skb);
tcp->bpf.flags |= BPF_F_INGRESS;
skb_bpf_set_ingress(skb);
/* If the queue is empty then we can submit directly
* into the msg queue. If its not empty we have to
@ -866,6 +856,24 @@ out_free:
}
}
static void sk_psock_write_space(struct sock *sk)
{
struct sk_psock *psock;
void (*write_space)(struct sock *sk) = NULL;
rcu_read_lock();
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_work(&psock->work);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
if (write_space)
write_space(sk);
}
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
{
struct sk_psock *psock;
@ -881,11 +889,12 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
goto out;
}
skb_set_owner_r(skb, sk);
prog = READ_ONCE(psock->progs.skb_verdict);
prog = READ_ONCE(psock->progs.stream_verdict);
if (likely(prog)) {
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
}
sk_psock_verdict_apply(psock, skb, ret);
out:
@ -899,15 +908,15 @@ static int sk_psock_strp_read_done(struct strparser *strp, int err)
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
{
struct sk_psock *psock = sk_psock_from_strp(strp);
struct sk_psock *psock = container_of(strp, struct sk_psock, strp);
struct bpf_prog *prog;
int ret = skb->len;
rcu_read_lock();
prog = READ_ONCE(psock->progs.skb_parser);
prog = READ_ONCE(psock->progs.stream_parser);
if (likely(prog)) {
skb->sk = psock->sk;
ret = sk_psock_bpf_run(psock, prog, skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
skb->sk = NULL;
}
rcu_read_unlock();
@ -923,16 +932,59 @@ static void sk_psock_strp_data_ready(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (tls_sw_has_ctx_rx(sk)) {
psock->parser.saved_data_ready(sk);
psock->saved_data_ready(sk);
} else {
write_lock_bh(&sk->sk_callback_lock);
strp_data_ready(&psock->parser.strp);
strp_data_ready(&psock->strp);
write_unlock_bh(&sk->sk_callback_lock);
}
}
rcu_read_unlock();
}
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
static const struct strp_callbacks cb = {
.rcv_msg = sk_psock_strp_read,
.read_sock_done = sk_psock_strp_read_done,
.parse_msg = sk_psock_strp_parse,
};
return strp_init(&psock->strp, sk, &cb);
}
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
if (psock->saved_data_ready)
return;
psock->saved_data_ready = sk->sk_data_ready;
sk->sk_data_ready = sk_psock_strp_data_ready;
sk->sk_write_space = sk_psock_write_space;
}
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
{
if (!psock->saved_data_ready)
return;
sk->sk_data_ready = psock->saved_data_ready;
psock->saved_data_ready = NULL;
strp_stop(&psock->strp);
}
static void sk_psock_done_strp(struct sk_psock *psock)
{
/* Parser has been stopped */
if (psock->progs.stream_parser)
strp_done(&psock->strp);
}
#else
static void sk_psock_done_strp(struct sk_psock *psock)
{
}
#endif /* CONFIG_BPF_STREAM_PARSER */
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
unsigned int offset, size_t orig_len)
{
@ -957,11 +1009,12 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
goto out;
}
skb_set_owner_r(skb, sk);
prog = READ_ONCE(psock->progs.skb_verdict);
prog = READ_ONCE(psock->progs.stream_verdict);
if (likely(prog)) {
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
}
sk_psock_verdict_apply(psock, skb, ret);
out:
@ -984,82 +1037,21 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
}
static void sk_psock_write_space(struct sock *sk)
{
struct sk_psock *psock;
void (*write_space)(struct sock *sk) = NULL;
rcu_read_lock();
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_work(&psock->work);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
if (write_space)
write_space(sk);
}
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
static const struct strp_callbacks cb = {
.rcv_msg = sk_psock_strp_read,
.read_sock_done = sk_psock_strp_read_done,
.parse_msg = sk_psock_strp_parse,
};
psock->parser.enabled = false;
return strp_init(&psock->parser.strp, sk, &cb);
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_parser *parser = &psock->parser;
if (parser->enabled)
if (psock->saved_data_ready)
return;
parser->saved_data_ready = sk->sk_data_ready;
psock->saved_data_ready = sk->sk_data_ready;
sk->sk_data_ready = sk_psock_verdict_data_ready;
sk->sk_write_space = sk_psock_write_space;
parser->enabled = true;
}
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_parser *parser = &psock->parser;
if (parser->enabled)
return;
parser->saved_data_ready = sk->sk_data_ready;
sk->sk_data_ready = sk_psock_strp_data_ready;
sk->sk_write_space = sk_psock_write_space;
parser->enabled = true;
}
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_parser *parser = &psock->parser;
if (!parser->enabled)
return;
sk->sk_data_ready = parser->saved_data_ready;
parser->saved_data_ready = NULL;
strp_stop(&parser->strp);
parser->enabled = false;
}
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_parser *parser = &psock->parser;
if (!parser->enabled)
if (!psock->saved_data_ready)
return;
sk->sk_data_ready = parser->saved_data_ready;
parser->saved_data_ready = NULL;
parser->enabled = false;
sk->sk_data_ready = psock->saved_data_ready;
psock->saved_data_ready = NULL;
}

View File

@ -24,6 +24,9 @@ struct bpf_stab {
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
struct bpf_stab *stab;
@ -148,9 +151,9 @@ static void sock_map_del_link(struct sock *sk,
struct bpf_map *map = link->map;
struct bpf_stab *stab = container_of(map, struct bpf_stab,
map);
if (psock->parser.enabled && stab->progs.skb_parser)
if (psock->saved_data_ready && stab->progs.stream_parser)
strp_stop = true;
if (psock->parser.enabled && stab->progs.skb_verdict)
if (psock->saved_data_ready && stab->progs.stream_verdict)
verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
@ -224,23 +227,23 @@ out:
static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
struct sock *sk)
{
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
struct sk_psock *psock;
int ret;
skb_verdict = READ_ONCE(progs->skb_verdict);
if (skb_verdict) {
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
if (IS_ERR(skb_verdict))
return PTR_ERR(skb_verdict);
stream_verdict = READ_ONCE(progs->stream_verdict);
if (stream_verdict) {
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
if (IS_ERR(stream_verdict))
return PTR_ERR(stream_verdict);
}
skb_parser = READ_ONCE(progs->skb_parser);
if (skb_parser) {
skb_parser = bpf_prog_inc_not_zero(skb_parser);
if (IS_ERR(skb_parser)) {
ret = PTR_ERR(skb_parser);
goto out_put_skb_verdict;
stream_parser = READ_ONCE(progs->stream_parser);
if (stream_parser) {
stream_parser = bpf_prog_inc_not_zero(stream_parser);
if (IS_ERR(stream_parser)) {
ret = PTR_ERR(stream_parser);
goto out_put_stream_verdict;
}
}
@ -249,7 +252,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
msg_parser = bpf_prog_inc_not_zero(msg_parser);
if (IS_ERR(msg_parser)) {
ret = PTR_ERR(msg_parser);
goto out_put_skb_parser;
goto out_put_stream_parser;
}
}
@ -261,8 +264,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
if (psock) {
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
(skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
(skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
(stream_parser && READ_ONCE(psock->progs.stream_parser)) ||
(stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
sk_psock_put(sk, psock);
ret = -EBUSY;
goto out_progs;
@ -283,15 +286,15 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
goto out_drop;
write_lock_bh(&sk->sk_callback_lock);
if (skb_parser && skb_verdict && !psock->parser.enabled) {
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
ret = sk_psock_init_strp(sk, psock);
if (ret)
goto out_unlock_drop;
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
psock_set_prog(&psock->progs.skb_parser, skb_parser);
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
psock_set_prog(&psock->progs.stream_parser, stream_parser);
sk_psock_start_strp(sk, psock);
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
sk_psock_start_verdict(sk,psock);
}
write_unlock_bh(&sk->sk_callback_lock);
@ -303,12 +306,12 @@ out_drop:
out_progs:
if (msg_parser)
bpf_prog_put(msg_parser);
out_put_skb_parser:
if (skb_parser)
bpf_prog_put(skb_parser);
out_put_skb_verdict:
if (skb_verdict)
bpf_prog_put(skb_verdict);
out_put_stream_parser:
if (stream_parser)
bpf_prog_put(stream_parser);
out_put_stream_verdict:
if (stream_verdict)
bpf_prog_put(stream_verdict);
return ret;
}
@ -657,7 +660,6 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
@ -667,8 +669,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = sk;
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
return SK_PASS;
}
@ -1250,7 +1251,6 @@ const struct bpf_func_proto bpf_sock_hash_update_proto = {
BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
struct bpf_map *, map, void *, key, u64, flags)
{
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
@ -1260,8 +1260,7 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = sk;
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
return SK_PASS;
}
@ -1448,8 +1447,8 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which)
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
struct bpf_prog **pprog;
@ -1461,11 +1460,13 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
case BPF_SK_MSG_VERDICT:
pprog = &progs->msg_parser;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
pprog = &progs->skb_parser;
pprog = &progs->stream_parser;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
pprog = &progs->skb_verdict;
pprog = &progs->stream_verdict;
break;
default:
return -EOPNOTSUPP;

View File

@ -62,7 +62,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \

View File

@ -229,7 +229,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
}
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
#ifdef CONFIG_BPF_STREAM_PARSER
#ifdef CONFIG_BPF_SYSCALL
static bool tcp_bpf_stream_read(const struct sock *sk)
{
struct sk_psock *psock;
@ -629,4 +629,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */

View File

@ -445,6 +445,97 @@ static void xsk_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
struct sk_buff *skb;
struct page *page;
void *buffer;
int err, i;
u64 addr;
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
if (unlikely(!skb))
return ERR_PTR(err);
skb_reserve(skb, hr);
addr = desc->addr;
len = desc->len;
ts = pool->unaligned ? len : pool->chunk_size;
buffer = xsk_buff_raw_get_data(pool, addr);
offset = offset_in_page(buffer);
addr = buffer - pool->addrs;
for (copied = 0, i = 0; copied < len; i++) {
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
copy = min_t(u32, PAGE_SIZE - offset, len - copied);
skb_fill_page_desc(skb, i, page, offset, copy);
copied += copy;
addr += copy;
offset = 0;
}
skb->len += len;
skb->data_len += len;
skb->truesize += ts;
refcount_add(ts, &xs->sk.sk_wmem_alloc);
return skb;
}
static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct net_device *dev = xs->dev;
struct sk_buff *skb;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
skb = xsk_build_skb_zerocopy(xs, desc);
if (IS_ERR(skb))
return skb;
} else {
u32 hr, tr, len;
void *buffer;
int err;
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
tr = dev->needed_tailroom;
len = desc->len;
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
if (unlikely(!skb))
return ERR_PTR(err);
skb_reserve(skb, hr);
skb_put(skb, len);
buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err)) {
kfree_skb(skb);
return ERR_PTR(err);
}
}
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
skb->mark = xs->sk.sk_mark;
skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;
return skb;
}
static int xsk_generic_xmit(struct sock *sk)
{
struct xdp_sock *xs = xdp_sk(sk);
@ -461,43 +552,30 @@ static int xsk_generic_xmit(struct sock *sk)
goto out;
while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
char *buffer;
u64 addr;
u32 len;
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb))
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
goto out;
}
skb_put(skb, len);
addr = desc.addr;
buffer = xsk_buff_raw_get_data(xs->pool, addr);
err = skb_store_bits(skb, 0, buffer, len);
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
skb->destructor = xsk_destruct_skb;
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */

View File

@ -47,19 +47,18 @@ struct xsk_queue {
u64 queue_empty_descs;
};
/* The structure of the shared state of the rings are the same as the
* ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
* ring, the kernel is the producer and user space is the consumer. For
* the Tx and fill rings, the kernel is the consumer and user space is
* the producer.
/* The structure of the shared state of the rings are a simple
* circular buffer, as outlined in
* Documentation/core-api/circular-buffers.rst. For the Rx and
* completion ring, the kernel is the producer and user space is the
* consumer. For the Tx and fill rings, the kernel is the consumer and
* user space is the producer.
*
* producer consumer
*
* if (LOAD ->consumer) { LOAD ->producer
* (A) smp_rmb() (C)
* if (LOAD ->consumer) { (A) LOAD.acq ->producer (C)
* STORE $data LOAD $data
* smp_wmb() (B) smp_mb() (D)
* STORE ->producer STORE ->consumer
* STORE.rel ->producer (B) STORE.rel ->consumer (D)
* }
*
* (A) pairs with (D), and (B) pairs with (C).
@ -78,7 +77,8 @@ struct xsk_queue {
*
* (A) is a control dependency that separates the load of ->consumer
* from the stores of $data. In case ->consumer indicates there is no
* room in the buffer to store $data we do not. So no barrier is needed.
* room in the buffer to store $data we do not. The dependency will
* order both of the stores after the loads. So no barrier is needed.
*
* (D) protects the load of the data to be observed to happen after the
* store of the consumer pointer. If we did not have this memory
@ -227,15 +227,13 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
static inline void __xskq_cons_release(struct xsk_queue *q)
{
smp_mb(); /* D, matches A */
WRITE_ONCE(q->ring->consumer, q->cached_cons);
smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */
}
static inline void __xskq_cons_peek(struct xsk_queue *q)
{
/* Refresh the local pointer */
q->cached_prod = READ_ONCE(q->ring->producer);
smp_rmb(); /* C, matches B */
q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */
}
static inline void xskq_cons_get_entries(struct xsk_queue *q)
@ -397,9 +395,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
{
smp_wmb(); /* B, matches C */
WRITE_ONCE(q->ring->producer, idx);
smp_store_release(&q->ring->producer, idx); /* B, matches C */
}
static inline void xskq_prod_submit(struct xsk_queue *q)

View File

@ -87,7 +87,6 @@ static void xsk_map_free(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
bpf_clear_redirect_map(map);
synchronize_net();
bpf_map_area_free(m);
}
@ -125,6 +124,16 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf;
}
static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
if (key >= map->max_entries)
return NULL;
return READ_ONCE(m->xsk_map[key]);
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
WARN_ON_ONCE(!rcu_read_lock_held());
@ -215,6 +224,11 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
}
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry)
{
@ -247,4 +261,5 @@ const struct bpf_map_ops xsk_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "xsk_map",
.map_btf_id = &xsk_map_btf_id,
.map_redirect = xsk_map_redirect,
};

View File

@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# Copyright (C) 2018-2019 Netronome Systems, Inc.
# Copyright (C) 2021 Isovalent, Inc.
# In case user attempts to run with Python 2.
from __future__ import print_function
@ -13,6 +14,9 @@ import sys, os
class NoHelperFound(BaseException):
pass
class NoSyscallCommandFound(BaseException):
pass
class ParsingError(BaseException):
def __init__(self, line='<line not provided>', reader=None):
if reader:
@ -22,18 +26,27 @@ class ParsingError(BaseException):
else:
BaseException.__init__(self, 'Error parsing line: %s' % line)
class Helper(object):
class APIElement(object):
"""
An object representing the description of an eBPF helper function.
@proto: function prototype of the helper function
@desc: textual description of the helper function
@ret: description of the return value of the helper function
An object representing the description of an aspect of the eBPF API.
@proto: prototype of the API symbol
@desc: textual description of the symbol
@ret: (optional) description of any associated return value
"""
def __init__(self, proto='', desc='', ret=''):
self.proto = proto
self.desc = desc
self.ret = ret
class Helper(APIElement):
"""
An object representing the description of an eBPF helper function.
@proto: function prototype of the helper function
@desc: textual description of the helper function
@ret: description of the return value of the helper function
"""
def proto_break_down(self):
"""
Break down helper function protocol into smaller chunks: return type,
@ -60,6 +73,7 @@ class Helper(object):
return res
class HeaderParser(object):
"""
An object used to parse a file in order to extract the documentation of a
@ -72,6 +86,13 @@ class HeaderParser(object):
self.reader = open(filename, 'r')
self.line = ''
self.helpers = []
self.commands = []
def parse_element(self):
proto = self.parse_symbol()
desc = self.parse_desc()
ret = self.parse_ret()
return APIElement(proto=proto, desc=desc, ret=ret)
def parse_helper(self):
proto = self.parse_proto()
@ -79,6 +100,18 @@ class HeaderParser(object):
ret = self.parse_ret()
return Helper(proto=proto, desc=desc, ret=ret)
def parse_symbol(self):
p = re.compile(' \* ?(.+)$')
capture = p.match(self.line)
if not capture:
raise NoSyscallCommandFound
end_re = re.compile(' \* ?NOTES$')
end = end_re.match(self.line)
if end:
raise NoSyscallCommandFound
self.line = self.reader.readline()
return capture.group(1)
def parse_proto(self):
# Argument can be of shape:
# - "void"
@ -140,16 +173,29 @@ class HeaderParser(object):
break
return ret
def run(self):
# Advance to start of helper function descriptions.
offset = self.reader.read().find('* Start of BPF helper function descriptions:')
def seek_to(self, target, help_message):
self.reader.seek(0)
offset = self.reader.read().find(target)
if offset == -1:
raise Exception('Could not find start of eBPF helper descriptions list')
raise Exception(help_message)
self.reader.seek(offset)
self.reader.readline()
self.reader.readline()
self.line = self.reader.readline()
def parse_syscall(self):
self.seek_to('* DOC: eBPF Syscall Commands',
'Could not find start of eBPF syscall descriptions list')
while True:
try:
command = self.parse_element()
self.commands.append(command)
except NoSyscallCommandFound:
break
def parse_helpers(self):
self.seek_to('* Start of BPF helper function descriptions:',
'Could not find start of eBPF helper descriptions list')
while True:
try:
helper = self.parse_helper()
@ -157,6 +203,9 @@ class HeaderParser(object):
except NoHelperFound:
break
def run(self):
self.parse_syscall()
self.parse_helpers()
self.reader.close()
###############################################################################
@ -165,10 +214,11 @@ class Printer(object):
"""
A generic class for printers. Printers should be created with an array of
Helper objects, and implement a way to print them in the desired fashion.
@helpers: array of Helper objects to print to standard output
@parser: A HeaderParser with objects to print to standard output
"""
def __init__(self, helpers):
self.helpers = helpers
def __init__(self, parser):
self.parser = parser
self.elements = []
def print_header(self):
pass
@ -181,19 +231,23 @@ class Printer(object):
def print_all(self):
self.print_header()
for helper in self.helpers:
self.print_one(helper)
for elem in self.elements:
self.print_one(elem)
self.print_footer()
class PrinterRST(Printer):
"""
A printer for dumping collected information about helpers as a ReStructured
Text page compatible with the rst2man program, which can be used to
generate a manual page for the helpers.
@helpers: array of Helper objects to print to standard output
A generic class for printers that print ReStructured Text. Printers should
be created with a HeaderParser object, and implement a way to print API
elements in the desired fashion.
@parser: A HeaderParser with objects to print to standard output
"""
def print_header(self):
header = '''\
def __init__(self, parser):
self.parser = parser
def print_license(self):
license = '''\
.. Copyright (C) All BPF authors and contributors from 2014 to present.
.. See git log include/uapi/linux/bpf.h in kernel tree for details.
..
@ -221,9 +275,39 @@ class PrinterRST(Printer):
..
.. Please do not edit this file. It was generated from the documentation
.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
.. (helpers description), and from scripts/bpf_doc.py in the same
.. repository (header and footer).
'''
print(license)
def print_elem(self, elem):
if (elem.desc):
print('\tDescription')
# Do not strip all newline characters: formatted code at the end of
# a section must be followed by a blank line.
for line in re.sub('\n$', '', elem.desc, count=1).split('\n'):
print('{}{}'.format('\t\t' if line else '', line))
if (elem.ret):
print('\tReturn')
for line in elem.ret.rstrip().split('\n'):
print('{}{}'.format('\t\t' if line else '', line))
print('')
class PrinterHelpersRST(PrinterRST):
"""
A printer for dumping collected information about helpers as a ReStructured
Text page compatible with the rst2man program, which can be used to
generate a manual page for the helpers.
@parser: A HeaderParser with Helper objects to print to standard output
"""
def __init__(self, parser):
self.elements = parser.helpers
def print_header(self):
header = '''\
===========
BPF-HELPERS
===========
@ -264,6 +348,7 @@ kernel at the top).
HELPERS
=======
'''
PrinterRST.print_license(self)
print(header)
def print_footer(self):
@ -380,27 +465,50 @@ SEE ALSO
def print_one(self, helper):
self.print_proto(helper)
self.print_elem(helper)
if (helper.desc):
print('\tDescription')
# Do not strip all newline characters: formatted code at the end of
# a section must be followed by a blank line.
for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
print('{}{}'.format('\t\t' if line else '', line))
if (helper.ret):
print('\tReturn')
for line in helper.ret.rstrip().split('\n'):
print('{}{}'.format('\t\t' if line else '', line))
class PrinterSyscallRST(PrinterRST):
"""
A printer for dumping collected information about the syscall API as a
ReStructured Text page compatible with the rst2man program, which can be
used to generate a manual page for the syscall.
@parser: A HeaderParser with APIElement objects to print to standard
output
"""
def __init__(self, parser):
self.elements = parser.commands
def print_header(self):
header = '''\
===
bpf
===
-------------------------------------------------------------------------------
Perform a command on an extended BPF object
-------------------------------------------------------------------------------
:Manual section: 2
COMMANDS
========
'''
PrinterRST.print_license(self)
print(header)
def print_one(self, command):
print('**%s**' % (command.proto))
self.print_elem(command)
print('')
class PrinterHelpers(Printer):
"""
A printer for dumping collected information about helpers as C header to
be included from BPF program.
@helpers: array of Helper objects to print to standard output
@parser: A HeaderParser with Helper objects to print to standard output
"""
def __init__(self, parser):
self.elements = parser.helpers
type_fwds = [
'struct bpf_fib_lookup',
@ -511,7 +619,7 @@ class PrinterHelpers(Printer):
def print_header(self):
header = '''\
/* This is auto-generated file. See bpf_helpers_doc.py for details. */
/* This is auto-generated file. See bpf_doc.py for details. */
/* Forward declarations of BPF structs */'''
@ -589,8 +697,13 @@ script = os.path.abspath(sys.argv[0])
linuxRoot = os.path.dirname(os.path.dirname(script))
bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
printers = {
'helpers': PrinterHelpersRST,
'syscall': PrinterSyscallRST,
}
argParser = argparse.ArgumentParser(description="""
Parse eBPF header file and generate documentation for eBPF helper functions.
Parse eBPF header file and generate documentation for the eBPF API.
The RST-formatted output produced can be turned into a manual page with the
rst2man utility.
""")
@ -601,6 +714,8 @@ if (os.path.isfile(bpfh)):
default=bpfh)
else:
argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
argParser.add_argument('target', nargs='?', default='helpers',
choices=printers.keys(), help='eBPF API target')
args = argParser.parse_args()
# Parse file.
@ -609,7 +724,9 @@ headerParser.run()
# Print formatted output to standard output.
if args.header:
printer = PrinterHelpers(headerParser.helpers)
if args.target != 'helpers':
raise NotImplementedError('Only helpers header generation is supported')
printer = PrinterHelpers(headerParser)
else:
printer = PrinterRST(headerParser.helpers)
printer = printers[args.target](headerParser)
printer.print_all()

View File

@ -1,60 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
ifndef allow-override
include ../scripts/Makefile.include
include ../scripts/utilities.mak
else
# Assume Makefile.helpers is being run from bpftool/Documentation
# subdirectory. Go up two more directories to fetch bpf.h header and
# associated script.
UP2DIR := ../../
endif
INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
ifeq ($(V),1)
Q =
else
Q = @
endif
prefix ?= /usr/local
mandir ?= $(prefix)/man
man7dir = $(mandir)/man7
HELPERS_RST = bpf-helpers.rst
MAN7_RST = $(HELPERS_RST)
_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
helpers: man7
man7: $(DOC_MAN7)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
$(OUTPUT)%.7: $(OUTPUT)%.rst
ifndef RST2MAN_DEP
$(error "rst2man not found, but required to generate man pages")
endif
$(QUIET_GEN)rst2man $< > $@
helpers-clean:
$(call QUIET_CLEAN, eBPF_helpers-manpage)
$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
helpers-install: helpers
$(call QUIET_INSTALL, eBPF_helpers-manpage)
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
helpers-uninstall:
$(call QUIET_UNINST, eBPF_helpers-manpage)
$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
.PHONY: helpers helpers-clean helpers-install helpers-uninstall

View File

@ -1198,7 +1198,7 @@ static int cmd_run(char *num)
else
return CMD_OK;
bpf_reset();
} while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
} while (pcap_next_pkt() && (!has_limit || (++i < pkts)));
rl_printf("bpf passes:%u fails:%u\n", pass, fail);

View File

@ -185,13 +185,13 @@ ldx
| OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
if ($2 != 4 || $9 != 0xf) {
fprintf(stderr, "ldxb offset not supported!\n");
exit(0);
exit(1);
} else {
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
| OP_LDX number '*' '(' '[' number ']' '&' number ')' {
if ($2 != 4 || $9 != 0xf) {
fprintf(stderr, "ldxb offset not supported!\n");
exit(0);
exit(1);
} else {
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
;
@ -472,7 +472,7 @@ static void bpf_assert_max(void)
{
if (curr_instr >= BPF_MAXINSNS) {
fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
exit(0);
exit(1);
}
}
@ -522,7 +522,7 @@ static int bpf_find_insns_offset(const char *label)
if (ret == -ENOENT) {
fprintf(stderr, "no such label \'%s\'!\n", label);
exit(0);
exit(1);
}
return ret;
@ -549,9 +549,11 @@ static uint8_t bpf_encode_jt_jf_offset(int off, int i)
{
int delta = off - i - 1;
if (delta < 0 || delta > 255)
fprintf(stderr, "warning: insn #%d jumps to insn #%d, "
if (delta < 0 || delta > 255) {
fprintf(stderr, "error: insn #%d jumps to insn #%d, "
"which is out of range\n", i, off);
exit(1);
}
return (uint8_t) delta;
}

View File

@ -3,7 +3,6 @@
/bootstrap/
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf

View File

@ -16,15 +16,12 @@ prefix ?= /usr/local
mandir ?= $(prefix)/man
man8dir = $(mandir)/man8
# Load targets for building eBPF helpers man page.
include ../../Makefile.helpers
MAN8_RST = $(wildcard bpftool*.rst)
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
man: man8 helpers
man: man8
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@ -46,16 +43,16 @@ ifndef RST2MAN_DEP
endif
$(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
clean: helpers-clean
clean:
$(call QUIET_CLEAN, Documentation)
$(Q)$(RM) $(DOC_MAN8)
install: man helpers-install
install: man
$(call QUIET_INSTALL, Documentation-man)
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
uninstall: helpers-uninstall
uninstall:
$(call QUIET_UNINST, Documentation-man)
$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
$(Q)$(RMDIR) $(DESTDIR)$(man8dir)

View File

@ -36,6 +36,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
};
struct btf_attach_table {
@ -327,6 +328,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
jsonw_end_array(w);
break;
}
case BTF_KIND_FLOAT: {
if (json_output)
jsonw_uint_field(w, "size", t->size);
else
printf(" size=%u", t->size);
break;
}
default:
break;
}

View File

@ -596,6 +596,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_INT:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FLOAT:
BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_STRUCT:

View File

@ -336,6 +336,10 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_BPF_JIT", },
/* Avoid compiling eBPF interpreter (use JIT only) */
{ "CONFIG_BPF_JIT_ALWAYS_ON", },
/* Kernel BTF debug information available */
{ "CONFIG_DEBUG_INFO_BTF", },
/* Kernel module BTF debug information available */
{ "CONFIG_DEBUG_INFO_BTF_MODULES", },
/* cgroups */
{ "CONFIG_CGROUPS", },

View File

@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm);

View File

@ -16,7 +16,10 @@ CFLAGS := -g -Wall
# Try to detect best kernel BTF source
KERNEL_REL := $(shell uname -r)
VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../vmlinux /sys/kernel/btf/vmlinux \
/boot/vmlinux-$(KERNEL_REL)
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
$(wildcard $(VMLINUX_BTF_PATHS))))
@ -66,12 +69,16 @@ $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
$(QUIET_MKDIR)mkdir -p $@
$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
ifeq ($(VMLINUX_H),)
$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
"specify its location." >&2; \
exit 1;\
fi
$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
else
$(Q)cp "$(VMLINUX_H)" $@
endif
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@

View File

@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10240);
__type(key, u32);
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, u64);
} start SEC(".maps");
@ -25,15 +25,20 @@ struct {
/* record enqueue timestamp */
__always_inline
static int trace_enqueue(u32 tgid, u32 pid)
static int trace_enqueue(struct task_struct *t)
{
u64 ts;
u32 pid = t->pid;
u64 *ptr;
if (!pid || (targ_pid && targ_pid != pid))
return 0;
ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start, &pid, &ts, 0);
ptr = bpf_task_storage_get(&start, t, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
*ptr = bpf_ktime_get_ns();
return 0;
}
@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
return trace_enqueue(p->tgid, p->pid);
return trace_enqueue(p);
}
SEC("tp_btf/sched_wakeup_new")
@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
return trace_enqueue(p->tgid, p->pid);
return trace_enqueue(p);
}
SEC("tp_btf/sched_switch")
@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
/* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING)
trace_enqueue(prev->tgid, prev->pid);
trace_enqueue(prev);
pid = next->pid;
/* For pid mismatch, save a bpf_task_storage_get */
if (!pid || (targ_pid && targ_pid != pid))
return 0;
/* fetch timestamp and calculate delta */
tsp = bpf_map_lookup_elem(&start, &pid);
tsp = bpf_task_storage_get(&start, next, 0, 0);
if (!tsp)
return 0; /* missed enqueue */
@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
bpf_map_delete_elem(&start, &pid);
bpf_task_storage_delete(&start, next);
return 0;
}

View File

@ -93,7 +93,717 @@ union bpf_iter_link_info {
} map;
};
/* BPF syscall commands, see bpf(2) man-page for details. */
/* BPF syscall commands, see bpf(2) man-page for more details. */
/**
* DOC: eBPF Syscall Preamble
*
* The operation to be performed by the **bpf**\ () system call is determined
* by the *cmd* argument. Each operation takes an accompanying argument,
* provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
* below). The size argument is the size of the union pointed to by *attr*.
*/
/**
* DOC: eBPF Syscall Commands
*
* BPF_MAP_CREATE
* Description
* Create a map and return a file descriptor that refers to the
* map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
* is automatically enabled for the new file descriptor.
*
* Applying **close**\ (2) to the file descriptor returned by
* **BPF_MAP_CREATE** will delete the map (but see NOTES).
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_MAP_LOOKUP_ELEM
* Description
* Look up an element with a given *key* in the map referred to
* by the file descriptor *map_fd*.
*
* The *flags* argument may be specified as one of the
* following:
*
* **BPF_F_LOCK**
* Look up the value of a spin-locked map without
* returning the lock. This must be specified if the
* elements contain a spinlock.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_UPDATE_ELEM
* Description
* Create or update an element (key/value pair) in a specified map.
*
* The *flags* argument should be specified as one of the
* following:
*
* **BPF_ANY**
* Create a new element or update an existing element.
* **BPF_NOEXIST**
* Create a new element only if it did not exist.
* **BPF_EXIST**
* Update an existing element.
* **BPF_F_LOCK**
* Update a spin_lock-ed map element.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
* **E2BIG**, **EEXIST**, or **ENOENT**.
*
* **E2BIG**
* The number of elements in the map reached the
* *max_entries* limit specified at map creation time.
* **EEXIST**
* If *flags* specifies **BPF_NOEXIST** and the element
* with *key* already exists in the map.
* **ENOENT**
* If *flags* specifies **BPF_EXIST** and the element with
* *key* does not exist in the map.
*
* BPF_MAP_DELETE_ELEM
* Description
* Look up and delete an element by key in a specified map.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_GET_NEXT_KEY
* Description
* Look up an element by key in a specified map and return the key
* of the next element. Can be used to iterate over all elements
* in the map.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* The following cases can be used to iterate over all elements of
* the map:
*
* * If *key* is not found, the operation returns zero and sets
* the *next_key* pointer to the key of the first element.
* * If *key* is found, the operation returns zero and sets the
* *next_key* pointer to the key of the next element.
* * If *key* is the last element, returns -1 and *errno* is set
* to **ENOENT**.
*
* May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
* **EINVAL** on error.
*
* BPF_PROG_LOAD
* Description
* Verify and load an eBPF program, returning a new file
* descriptor associated with the program.
*
* Applying **close**\ (2) to the file descriptor returned by
* **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
*
* The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
* automatically enabled for the new file descriptor.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_OBJ_PIN
* Description
* Pin an eBPF program or map referred by the specified *bpf_fd*
* to the provided *pathname* on the filesystem.
*
* The *pathname* argument must not contain a dot (".").
*
* On success, *pathname* retains a reference to the eBPF object,
* preventing deallocation of the object when the original
* *bpf_fd* is closed. This allow the eBPF object to live beyond
* **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
* process.
*
* Applying **unlink**\ (2) or similar calls to the *pathname*
* unpins the object from the filesystem, removing the reference.
* If no other file descriptors or filesystem nodes refer to the
* same object, it will be deallocated (see NOTES).
*
* The filesystem type for the parent directory of *pathname* must
* be **BPF_FS_MAGIC**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_OBJ_GET
* Description
* Open a file descriptor for the eBPF object pinned to the
* specified *pathname*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_PROG_ATTACH
* Description
* Attach an eBPF program to a *target_fd* at the specified
* *attach_type* hook.
*
* The *attach_type* specifies the eBPF attachment point to
* attach the program to, and must be one of *bpf_attach_type*
* (see below).
*
* The *attach_bpf_fd* must be a valid file descriptor for a
* loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
* or sock_ops type corresponding to the specified *attach_type*.
*
* The *target_fd* must be a valid file descriptor for a kernel
* object which depends on the attach type of *attach_bpf_fd*:
*
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
* **BPF_PROG_TYPE_CGROUP_SKB**,
* **BPF_PROG_TYPE_CGROUP_SOCK**,
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
* **BPF_PROG_TYPE_SOCK_OPS**
*
* Control Group v2 hierarchy with the eBPF controller
* enabled. Requires the kernel to be compiled with
* **CONFIG_CGROUP_BPF**.
*
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
*
* Network namespace (eg /proc/self/ns/net).
*
* **BPF_PROG_TYPE_LIRC_MODE2**
*
* LIRC device path (eg /dev/lircN). Requires the kernel
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
*
* **BPF_PROG_TYPE_SK_SKB**,
* **BPF_PROG_TYPE_SK_MSG**
*
* eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_DETACH
* Description
* Detach the eBPF program associated with the *target_fd* at the
* hook specified by *attach_type*. The program must have been
* previously attached using **BPF_PROG_ATTACH**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_TEST_RUN
* Description
* Run the eBPF program associated with the *prog_fd* a *repeat*
* number of times against a provided program context *ctx_in* and
* data *data_in*, and return the modified program context
* *ctx_out*, *data_out* (for example, packet data), result of the
* execution *retval*, and *duration* of the test run.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* **ENOSPC**
* Either *data_size_out* or *ctx_size_out* is too small.
* **ENOTSUPP**
* This command is not supported by the program type of
* the program referred to by *prog_fd*.
*
* BPF_PROG_GET_NEXT_ID
* Description
* Fetch the next eBPF program currently loaded into the kernel.
*
* Looks for the eBPF program with an id greater than *start_id*
* and updates *next_id* on success. If no other eBPF programs
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_MAP_GET_NEXT_ID
* Description
* Fetch the next eBPF map currently loaded into the kernel.
*
* Looks for the eBPF map with an id greater than *start_id*
* and updates *next_id* on success. If no other eBPF maps
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_PROG_GET_FD_BY_ID
* Description
* Open a file descriptor for the eBPF program corresponding to
* *prog_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_MAP_GET_FD_BY_ID
* Description
* Open a file descriptor for the eBPF map corresponding to
* *map_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_OBJ_GET_INFO_BY_FD
* Description
* Obtain information about the eBPF object corresponding to
* *bpf_fd*.
*
* Populates up to *info_len* bytes of *info*, which will be in
* one of the following formats depending on the eBPF object type
* of *bpf_fd*:
*
* * **struct bpf_prog_info**
* * **struct bpf_map_info**
* * **struct bpf_btf_info**
* * **struct bpf_link_info**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_QUERY
* Description
* Obtain information about eBPF programs associated with the
* specified *attach_type* hook.
*
* The *target_fd* must be a valid file descriptor for a kernel
* object which depends on the attach type of *attach_bpf_fd*:
*
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
* **BPF_PROG_TYPE_CGROUP_SKB**,
* **BPF_PROG_TYPE_CGROUP_SOCK**,
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
* **BPF_PROG_TYPE_SOCK_OPS**
*
* Control Group v2 hierarchy with the eBPF controller
* enabled. Requires the kernel to be compiled with
* **CONFIG_CGROUP_BPF**.
*
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
*
* Network namespace (eg /proc/self/ns/net).
*
* **BPF_PROG_TYPE_LIRC_MODE2**
*
* LIRC device path (eg /dev/lircN). Requires the kernel
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
*
* **BPF_PROG_QUERY** always fetches the number of programs
* attached and the *attach_flags* which were used to attach those
* programs. Additionally, if *prog_ids* is nonzero and the number
* of attached programs is less than *prog_cnt*, populates
* *prog_ids* with the eBPF program ids of the programs attached
* at *target_fd*.
*
* The following flags may alter the result:
*
* **BPF_F_QUERY_EFFECTIVE**
* Only return information regarding programs which are
* currently effective at the specified *target_fd*.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_RAW_TRACEPOINT_OPEN
* Description
* Attach an eBPF program to a tracepoint *name* to access kernel
* internal arguments of the tracepoint in their raw form.
*
* The *prog_fd* must be a valid file descriptor associated with
* a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
*
* No ABI guarantees are made about the content of tracepoint
* arguments exposed to the corresponding eBPF program.
*
* Applying **close**\ (2) to the file descriptor returned by
* **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_BTF_LOAD
* Description
* Verify and load BPF Type Format (BTF) metadata into the kernel,
* returning a new file descriptor associated with the metadata.
* BTF is described in more detail at
* https://www.kernel.org/doc/html/latest/bpf/btf.html.
*
* The *btf* parameter must point to valid memory providing
* *btf_size* bytes of BTF binary metadata.
*
* The returned file descriptor can be passed to other **bpf**\ ()
* subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
* associate the BTF with those objects.
*
* Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
* parameters to specify a *btf_log_buf*, *btf_log_size* and
* *btf_log_level* which allow the kernel to return freeform log
* output regarding the BTF verification process.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_BTF_GET_FD_BY_ID
* Description
* Open a file descriptor for the BPF Type Format (BTF)
* corresponding to *btf_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_TASK_FD_QUERY
* Description
* Obtain information about eBPF programs associated with the
* target process identified by *pid* and *fd*.
*
* If the *pid* and *fd* are associated with a tracepoint, kprobe
* or uprobe perf event, then the *prog_id* and *fd_type* will
* be populated with the eBPF program id and file descriptor type
* of type **bpf_task_fd_type**. If associated with a kprobe or
* uprobe, the *probe_offset* and *probe_addr* will also be
* populated. Optionally, if *buf* is provided, then up to
* *buf_len* bytes of *buf* will be populated with the name of
* the tracepoint, kprobe or uprobe.
*
* The resulting *prog_id* may be introspected in deeper detail
* using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_LOOKUP_AND_DELETE_ELEM
* Description
* Look up an element with the given *key* in the map referred to
* by the file descriptor *fd*, and if found, delete the element.
*
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
* implement this command as a "pop" operation, deleting the top
* element rather than one corresponding to *key*.
* The *key* and *key_len* parameters should be zeroed when
* issuing this operation for these map types.
*
* This command is only valid for the following map types:
* * **BPF_MAP_TYPE_QUEUE**
* * **BPF_MAP_TYPE_STACK**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_FREEZE
* Description
* Freeze the permissions of the specified map.
*
* Write permissions may be frozen by passing zero *flags*.
* Upon success, no future syscall invocations may alter the
* map state of *map_fd*. Write operations from eBPF programs
* are still possible for a frozen map.
*
* Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_BTF_GET_NEXT_ID
* Description
* Fetch the next BPF Type Format (BTF) object currently loaded
* into the kernel.
*
* Looks for the BTF object with an id greater than *start_id*
* and updates *next_id* on success. If no other BTF objects
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_MAP_LOOKUP_BATCH
* Description
* Iterate and fetch multiple elements in a map.
*
* Two opaque values are used to manage batch operations,
* *in_batch* and *out_batch*. Initially, *in_batch* must be set
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
* continue iteration from the current point.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
* and value size of the map *map_fd*. The *keys* buffer must be
* of *key_size* * *count*. The *values* buffer must be of
* *value_size* * *count*.
*
* The *elem_flags* argument may be specified as one of the
* following:
*
* **BPF_F_LOCK**
* Look up the value of a spin-locked map without
* returning the lock. This must be specified if the
* elements contain a spinlock.
*
* On success, *count* elements from the map are copied into the
* user buffer, with the keys copied into *keys* and the values
* copied into the corresponding indices in *values*.
*
* If an error is returned and *errno* is not **EFAULT**, *count*
* is set to the number of successfully processed elements.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* May set *errno* to **ENOSPC** to indicate that *keys* or
* *values* is too small to dump an entire bucket during
* iteration of a hash-based map type.
*
* BPF_MAP_LOOKUP_AND_DELETE_BATCH
* Description
* Iterate and delete all elements in a map.
*
* This operation has the same behavior as
* **BPF_MAP_LOOKUP_BATCH** with two exceptions:
*
* * Every element that is successfully returned is also deleted
* from the map. This is at least *count* elements. Note that
* *count* is both an input and an output parameter.
* * Upon returning with *errno* set to **EFAULT**, up to
* *count* elements may be deleted without returning the keys
* and values of the deleted elements.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_MAP_UPDATE_BATCH
* Description
* Update multiple elements in a map by *key*.
*
* The *keys* and *values* are input parameters which must point
* to memory large enough to hold *count* items based on the key
* and value size of the map *map_fd*. The *keys* buffer must be
* of *key_size* * *count*. The *values* buffer must be of
* *value_size* * *count*.
*
* Each element specified in *keys* is sequentially updated to the
* value in the corresponding index in *values*. The *in_batch*
* and *out_batch* parameters are ignored and should be zeroed.
*
* The *elem_flags* argument should be specified as one of the
* following:
*
* **BPF_ANY**
* Create new elements or update a existing elements.
* **BPF_NOEXIST**
* Create new elements only if they do not exist.
* **BPF_EXIST**
* Update existing elements.
* **BPF_F_LOCK**
* Update spin_lock-ed map elements. This must be
* specified if the map value contains a spinlock.
*
* On success, *count* elements from the map are updated.
*
* If an error is returned and *errno* is not **EFAULT**, *count*
* is set to the number of successfully processed elements.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
* **E2BIG**. **E2BIG** indicates that the number of elements in
* the map reached the *max_entries* limit specified at map
* creation time.
*
* May set *errno* to one of the following error codes under
* specific circumstances:
*
* **EEXIST**
* If *flags* specifies **BPF_NOEXIST** and the element
* with *key* already exists in the map.
* **ENOENT**
* If *flags* specifies **BPF_EXIST** and the element with
* *key* does not exist in the map.
*
* BPF_MAP_DELETE_BATCH
* Description
* Delete multiple elements in a map by *key*.
*
* The *keys* parameter is an input parameter which must point
* to memory large enough to hold *count* items based on the key
* size of the map *map_fd*, that is, *key_size* * *count*.
*
* Each element specified in *keys* is sequentially deleted. The
* *in_batch*, *out_batch*, and *values* parameters are ignored
* and should be zeroed.
*
* The *elem_flags* argument may be specified as one of the
* following:
*
* **BPF_F_LOCK**
* Look up the value of a spin-locked map without
* returning the lock. This must be specified if the
* elements contain a spinlock.
*
* On success, *count* elements from the map are updated.
*
* If an error is returned and *errno* is not **EFAULT**, *count*
* is set to the number of successfully processed elements. If
* *errno* is **EFAULT**, up to *count* elements may be been
* deleted.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_LINK_CREATE
* Description
* Attach an eBPF program to a *target_fd* at the specified
* *attach_type* hook and return a file descriptor handle for
* managing the link.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_LINK_UPDATE
* Description
* Update the eBPF program in the specified *link_fd* to
* *new_prog_fd*.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_LINK_GET_FD_BY_ID
* Description
* Open a file descriptor for the eBPF Link corresponding to
* *link_id*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_LINK_GET_NEXT_ID
* Description
* Fetch the next eBPF link currently loaded into the kernel.
*
* Looks for the eBPF link with an id greater than *start_id*
* and updates *next_id* on success. If no other eBPF links
* remain with ids higher than *start_id*, returns -1 and sets
* *errno* to **ENOENT**.
*
* Return
* Returns zero on success. On error, or when no id remains, -1
* is returned and *errno* is set appropriately.
*
* BPF_ENABLE_STATS
* Description
* Enable eBPF runtime statistics gathering.
*
* Runtime statistics gathering for the eBPF runtime is disabled
* by default to minimize the corresponding performance overhead.
* This command enables statistics globally.
*
* Multiple programs may independently enable statistics.
* After gathering the desired statistics, eBPF runtime statistics
* may be disabled again by calling **close**\ (2) for the file
* descriptor returned by this function. Statistics will only be
* disabled system-wide when all outstanding file descriptors
* returned by prior calls for this subcommand are closed.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_ITER_CREATE
* Description
* Create an iterator on top of the specified *link_fd* (as
* previously created using **BPF_LINK_CREATE**) and return a
* file descriptor that can be used to trigger the iteration.
*
* If the resulting file descriptor is pinned to the filesystem
* using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
* for that path will trigger the iterator to read kernel state
* using the eBPF program attached to *link_fd*.
*
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
* BPF_LINK_DETACH
* Description
* Forcefully detach the specified *link_fd* from its
* corresponding attachment point.
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* BPF_PROG_BIND_MAP
* Description
* Bind a map to the lifetime of an eBPF program.
*
* The map identified by *map_fd* is bound to the program
* identified by *prog_fd* and only released when *prog_fd* is
* released. This may be used in cases where metadata should be
* associated with a program which otherwise does not contain any
* references to the map (for example, embedded in the eBPF
* program instructions).
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
* * After **fork**\ (2), the child inherits file descriptors
* referring to the same eBPF objects.
* * File descriptors referring to eBPF objects can be transferred over
* **unix**\ (7) domain sockets.
* * File descriptors referring to eBPF objects can be duplicated in the
* usual way, using **dup**\ (2) and similar calls.
* * File descriptors referring to eBPF objects can be pinned to the
* filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
*
* An eBPF object is deallocated only after all file descriptors referring
* to the object have been closed and no references remain pinned to the
* filesystem or attached (for example, bound to a program or device).
*/
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@ -393,6 +1103,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@ -720,7 +1439,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
* $ ./scripts/bpf_helpers_doc.py \
* $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@ -1765,6 +2484,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
* L2 type as Ethernet.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@ -3909,6 +4632,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -4075,6 +4826,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@ -4168,6 +4920,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@ -5205,7 +5958,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
union {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
};
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */

View File

@ -52,7 +52,7 @@ struct btf_type {
};
};
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
#define BTF_KIND_MAX BTF_KIND_DATASEC
#define BTF_KIND_FLOAT 16 /* Floating point */
#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately

View File

@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
$(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)

View File

@ -291,6 +291,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
@ -338,6 +339,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
return 0;
case BTF_KIND_INT:
*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@ -578,6 +580,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
size = t->size;
goto done;
case BTF_KIND_PTR:
@ -621,6 +624,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
return btf_ptr_sz(btf);
@ -1756,6 +1760,47 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
return btf_commit_type(btf, sz);
}
/*
* Append new BTF_KIND_FLOAT type with:
* - *name* - non-empty, non-NULL type name;
* - *sz* - size of the type, in bytes;
* Returns:
* - >0, type ID of newly added BTF type;
* - <0, on error.
*/
int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
{
struct btf_type *t;
int sz, name_off;
/* non-empty name */
if (!name || !name[0])
return -EINVAL;
/* byte_sz must be one of the explicitly allowed values */
if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
byte_sz != 16)
return -EINVAL;
if (btf_ensure_modifiable(btf))
return -ENOMEM;
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
return -ENOMEM;
name_off = btf__add_str(btf, name);
if (name_off < 0)
return name_off;
t->name_off = name_off;
t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
t->size = byte_sz;
return btf_commit_type(btf, sz);
}
/* it's completely legal to append BTF types with type IDs pointing forward to
* types that haven't been appended yet, so we only make sure that id looks
* sane, we can't guarantee that ID will always be valid
@ -1883,7 +1928,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
* - *byte_sz* - size of the struct, in bytes;
*
* Struct initially has no fields in it. Fields can be added by
* btf__add_field() right after btf__add_struct() succeeds.
* btf__add_field() right after btf__add_struct() succeeds.
*
* Returns:
* - >0, type ID of newly added BTF type;
@ -3626,6 +3671,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
case BTF_KIND_FWD:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
h = btf_hash_common(t);
break;
case BTF_KIND_INT:
@ -3722,6 +3768,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
@ -3983,6 +4030,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
return btf_equal_common(cand_type, canon_type);
case BTF_KIND_CONST:
@ -4479,6 +4527,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
break;
case BTF_KIND_FWD:

View File

@ -95,6 +95,7 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_array(struct btf *btf,
int index_type_id, int elem_type_id, __u32 nr_elems);
@ -294,6 +295,11 @@ static inline bool btf_is_datasec(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_DATASEC;
}
static inline bool btf_is_float(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_FLOAT;
}
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));

View File

@ -279,6 +279,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
case BTF_KIND_INT:
case BTF_KIND_ENUM:
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
break;
case BTF_KIND_VOLATILE:
@ -453,6 +454,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
tstate->order_state = ORDERED;
return 0;
@ -1133,6 +1135,7 @@ skip_mod:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FLOAT:
goto done;
default:
pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@ -1247,6 +1250,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_FLOAT:
btf_dump_emit_mods(d, decls);
name = btf_name_of(d, t->name_off);
btf_dump_printf(d, "%s", name);

View File

@ -178,6 +178,8 @@ enum kern_feature_id {
FEAT_PROG_BIND_MAP,
/* Kernel support for module BTFs */
FEAT_MODULE_BTF,
/* BTF_KIND_FLOAT support */
FEAT_BTF_FLOAT,
__FEAT_CNT,
};
@ -188,6 +190,7 @@ enum reloc_type {
RELO_CALL,
RELO_DATA,
RELO_EXTERN,
RELO_SUBPROG_ADDR,
};
struct reloc_desc {
@ -574,6 +577,16 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
static bool is_ldimm64(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static bool insn_is_pseudo_func(struct bpf_insn *insn)
{
return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
static int
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name,
@ -1935,6 +1948,7 @@ static const char *btf_kind_str(const struct btf_type *t)
case BTF_KIND_FUNC_PROTO: return "func_proto";
case BTF_KIND_VAR: return "var";
case BTF_KIND_DATASEC: return "datasec";
case BTF_KIND_FLOAT: return "float";
default: return "unknown";
}
}
@ -2384,15 +2398,17 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
{
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
bool has_float = kernel_supports(FEAT_BTF_FLOAT);
bool has_func = kernel_supports(FEAT_BTF_FUNC);
return !has_func || !has_datasec || !has_func_global;
return !has_func || !has_datasec || !has_func_global || !has_float;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
bool has_float = kernel_supports(FEAT_BTF_FLOAT);
bool has_func = kernel_supports(FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@ -2445,6 +2461,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
} else if (!has_func_global && btf_is_func(t)) {
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
} else if (!has_float && btf_is_float(t)) {
/* replace FLOAT with an equally-sized empty STRUCT;
* since C compilers do not accept e.g. "float" as a
* valid struct name, make it anonymous
*/
t->name_off = 0;
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
}
}
}
@ -2974,6 +2997,23 @@ static bool sym_is_extern(const GElf_Sym *sym)
GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
}
static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
{
int bind = GELF_ST_BIND(sym->st_info);
int type = GELF_ST_TYPE(sym->st_info);
/* in .text section */
if (sym->st_shndx != text_shndx)
return false;
/* local function */
if (bind == STB_LOCAL && type == STT_SECTION)
return true;
/* global function */
return bind == STB_GLOBAL && type == STT_FUNC;
}
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{
const struct btf_type *t;
@ -3395,7 +3435,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
if (!is_ldimm64(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
@ -3430,6 +3470,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return -LIBBPF_ERRNO__RELOC;
}
/* loading subprog addresses */
if (sym_is_subprog(sym, obj->efile.text_shndx)) {
/* global_func: sym->st_value = offset in the section, insn->imm = 0.
* local_func: sym->st_value = 0, insn->imm = offset in the section.
*/
if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
prog->name, sym_name, (size_t)sym->st_value, insn->imm);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_SUBPROG_ADDR;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = sym->st_value;
return 0;
}
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
@ -3882,6 +3939,18 @@ static int probe_kern_btf_datasec(void)
strs, sizeof(strs)));
}
static int probe_kern_btf_float(void)
{
static const char strs[] = "\0float";
__u32 types[] = {
/* float */
BTF_TYPE_FLOAT_ENC(1, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_array_mmap(void)
{
struct bpf_create_map_attr attr = {
@ -4061,6 +4130,9 @@ static struct kern_feature_desc {
[FEAT_MODULE_BTF] = {
"module BTF support", probe_module_btf,
},
[FEAT_BTF_FLOAT] = {
"BTF_KIND_FLOAT support", probe_kern_btf_float,
},
};
static bool kernel_supports(enum kern_feature_id feat_id)
@ -5566,11 +5638,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
}
static bool is_ldimm64(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
{
switch (BPF_SIZE(insn->code)) {
@ -6172,6 +6239,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
relo->processed = true;
break;
case RELO_SUBPROG_ADDR:
insn[0].src_reg = BPF_PSEUDO_FUNC;
/* will be handled as a follow up pass */
break;
case RELO_CALL:
/* will be handled as a follow up pass */
break;
@ -6358,11 +6429,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
if (!insn_is_subprog_call(insn))
if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
continue;
relo = find_prog_insn_relo(prog, insn_idx);
if (relo && relo->type != RELO_CALL) {
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
@ -6374,8 +6445,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* call always has imm = -1, but for static functions
* relocation is against STT_SECTION and insn->imm
* points to a start of a static function
*
* for subprog addr relocation, the relo->sym_off + insn->imm is
* the byte offset in the corresponding section.
*/
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
if (relo->type == RELO_CALL)
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
else
sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
} else if (insn_is_pseudo_func(insn)) {
/*
* RELO_SUBPROG_ADDR relo is always emitted even if both
* functions are in the same section, so it shouldn't reach here.
*/
pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
prog->name, insn_idx);
return -LIBBPF_ERRNO__RELOC;
} else {
/* if subprogram call is to a static function within
* the same ELF section, there won't be any relocation

View File

@ -350,3 +350,8 @@ LIBBPF_0.3.0 {
xsk_setup_xdp_prog;
xsk_socket__update_xskmap;
} LIBBPF_0.2.0;
LIBBPF_0.4.0 {
global:
btf__add_float;
} LIBBPF_0.3.0;

View File

@ -31,6 +31,8 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
#define BTF_TYPE_FLOAT_ENC(name, sz) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)

View File

@ -5,6 +5,7 @@
#define __LIBBPF_LIBBPF_UTIL_H
#include <stdbool.h>
#include <linux/compiler.h>
#ifdef __cplusplus
extern "C" {
@ -15,29 +16,56 @@ extern "C" {
* application that uses libbpf.
*/
#if defined(__i386__) || defined(__x86_64__)
# define libbpf_smp_rmb() asm volatile("" : : : "memory")
# define libbpf_smp_wmb() asm volatile("" : : : "memory")
# define libbpf_smp_mb() \
asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
/* Hinders stores to be observed before older loads. */
# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
# define libbpf_smp_store_release(p, v) \
do { \
asm volatile("" : : : "memory"); \
WRITE_ONCE(*p, v); \
} while (0)
# define libbpf_smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
asm volatile("" : : : "memory"); \
___p1; \
})
#elif defined(__aarch64__)
# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
# define libbpf_smp_rwmb() libbpf_smp_mb()
#elif defined(__arm__)
/* These are only valid for armv7 and above */
# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
# define libbpf_smp_rwmb() libbpf_smp_mb()
#else
/* Architecture missing native barrier functions. */
# define libbpf_smp_rmb() __sync_synchronize()
# define libbpf_smp_wmb() __sync_synchronize()
# define libbpf_smp_mb() __sync_synchronize()
# define libbpf_smp_rwmb() __sync_synchronize()
# define libbpf_smp_store_release(p, v) \
asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
# define libbpf_smp_load_acquire(p) \
({ \
typeof(*p) ___p1; \
asm volatile ("ldar %w0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
___p1; \
})
#elif defined(__riscv)
# define libbpf_smp_store_release(p, v) \
do { \
asm volatile ("fence rw,w" : : : "memory"); \
WRITE_ONCE(*p, v); \
} while (0)
# define libbpf_smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
asm volatile ("fence r,rw" : : : "memory"); \
___p1; \
})
#endif
#ifndef libbpf_smp_store_release
#define libbpf_smp_store_release(p, v) \
do { \
__sync_synchronize(); \
WRITE_ONCE(*p, v); \
} while (0)
#endif
#ifndef libbpf_smp_load_acquire
#define libbpf_smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
__sync_synchronize(); \
___p1; \
})
#endif
#ifdef __cplusplus

View File

@ -96,7 +96,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
* this function. Without this optimization it whould have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
r->cached_cons = *r->consumer + r->size;
r->cached_cons = libbpf_smp_load_acquire(r->consumer);
r->cached_cons += r->size;
return r->cached_cons - r->cached_prod;
}
@ -106,7 +107,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
__u32 entries = r->cached_prod - r->cached_cons;
if (entries == 0) {
r->cached_prod = *r->producer;
r->cached_prod = libbpf_smp_load_acquire(r->producer);
entries = r->cached_prod - r->cached_cons;
}
@ -129,9 +130,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
/* Make sure everything has been written to the ring before indicating
* this to the kernel by writing the producer pointer.
*/
libbpf_smp_wmb();
*prod->producer += nb;
libbpf_smp_store_release(prod->producer, *prod->producer + nb);
}
static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@ -139,11 +138,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
__u32 entries = xsk_cons_nb_avail(cons, nb);
if (entries > 0) {
/* Make sure we do not speculatively read the data before
* we have received the packet buffers from the ring.
*/
libbpf_smp_rmb();
*idx = cons->cached_cons;
cons->cached_cons += entries;
}
@ -161,9 +155,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
/* Make sure data has been read before indicating we are done
* with the entries by updating the consumer pointer.
*/
libbpf_smp_rwmb();
libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
*cons->consumer += nb;
}
static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)

View File

@ -20,4 +20,4 @@ tools/lib/bitmap.c
tools/lib/str_error_r.c
tools/lib/vsprintf.c
tools/lib/zalloc.c
scripts/bpf_helpers_doc.py
scripts/bpf_doc.py

View File

@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
bpf-helpers*
bpf-syscall*
test_verifier
test_maps
test_lru_map

View File

@ -68,6 +68,7 @@ TEST_PROGS := test_kmod.sh \
test_bpftool_build.sh \
test_bpftool.sh \
test_bpftool_metadata.sh \
test_doc_build.sh \
test_xsk.sh
TEST_PROGS_EXTENDED := with_addr.sh \
@ -103,6 +104,7 @@ override define CLEAN
$(call msg,CLEAN)
$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
$(Q)$(MAKE) -C bpf_testmod clean
$(Q)$(MAKE) docs-clean
endef
include ../lib.mk
@ -180,6 +182,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
cp $(SCRATCH_DIR)/runqslower $@
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
$(TEST_GEN_FILES): docs
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@ -200,11 +203,16 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
CC=$(HOSTCC) LD=$(HOSTLD) \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
$(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
-C $(BPFTOOLDIR)/Documentation \
OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
prefix= DESTDIR=$(SCRATCH_DIR)/ install
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
-f Makefile.docs \
prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
docs-clean:
$(Q)$(MAKE) $(submake_extras) \
-f Makefile.docs \
prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
@ -382,11 +390,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
# only copy extra resources if in flavored build
# non-flavored in-srctree builds receive special treatment, in particular, we
# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
ifneq ($2,)
ifneq ($2:$(OUTPUT),:$(shell pwd))
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
$(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
@ -476,3 +485,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
.PHONY: docs docs-clean

View File

@ -0,0 +1,82 @@
# SPDX-License-Identifier: GPL-2.0-only
include ../../../scripts/Makefile.include
include ../../../scripts/utilities.mak
INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
ifeq ($(V),1)
Q =
else
Q = @
endif
prefix ?= /usr/local
mandir ?= $(prefix)/man
man2dir = $(mandir)/man2
man7dir = $(mandir)/man7
SYSCALL_RST = bpf-syscall.rst
MAN2_RST = $(SYSCALL_RST)
HELPERS_RST = bpf-helpers.rst
MAN7_RST = $(HELPERS_RST)
_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
DOCTARGETS := helpers syscall
docs: $(DOCTARGETS)
syscall: man2
helpers: man7
man2: $(DOC_MAN2)
man7: $(DOC_MAN7)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
# Configure make rules for the man page bpf-$1.$2.
# $1 - target for scripts/bpf_doc.py
# $2 - man page section to generate the troff file
define DOCS_RULES =
$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
$$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
--filename $$< > $$@
$(OUTPUT)%.$2: $(OUTPUT)%.rst
ifndef RST2MAN_DEP
$$(error "rst2man not found, but required to generate man pages")
endif
$$(QUIET_GEN)rst2man $$< > $$@
docs-clean-$1:
$$(call QUIET_CLEAN, eBPF_$1-manpage)
$(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
docs-install-$1: docs
$$(call QUIET_INSTALL, eBPF_$1-manpage)
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
$(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
docs-uninstall-$1:
$$(call QUIET_UNINST, eBPF_$1-manpage)
$(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
$(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
endef
# Create the make targets to generate manual pages by name and section
$(eval $(call DOCS_RULES,helpers,7))
$(eval $(call DOCS_RULES,syscall,2))
docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
.PHONY: docs docs-clean docs-install docs-uninstall man2 man7

View File

@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
__ https://reviews.llvm.org/D78466
bpf_verif_scale/loop6.o test failure with Clang 12
==================================================
With Clang 12, the following bpf_verif_scale test failed:
* ``bpf_verif_scale/loop6.o``
The verifier output looks like
.. code-block:: c
R1 type=ctx expected=fp
The sequence of 8193 jumps is too complex.
The reason is compiler generating the following code
.. code-block:: c
; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
15: bc 51 00 00 00 00 00 00 w1 = w5
16: 04 01 00 00 ff ff ff ff w1 += -1
17: 67 05 00 00 20 00 00 00 r5 <<= 32
18: 77 05 00 00 20 00 00 00 r5 >>= 32
19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
20: b7 05 00 00 06 00 00 00 r5 = 6
00000000000000a8 <LBB0_4>:
21: b7 02 00 00 00 00 00 00 r2 = 0
22: b7 01 00 00 00 00 00 00 r1 = 0
; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
Note that insn #15 has w1 = w5 and w1 is refined later but
r5(w5) is eventually saved on stack at insn #24 for later use.
This cause later verifier failure. The bug has been `fixed`__ in
Clang 13.
__ https://reviews.llvm.org/D97479
BPF CO-RE-based tests and Clang version
=======================================
@ -131,3 +170,12 @@ failures:
.. _2: https://reviews.llvm.org/D85174
.. _3: https://reviews.llvm.org/D83878
.. _4: https://reviews.llvm.org/D83242
Floating-point tests and Clang version
======================================
Certain selftests, e.g. core_reloc, require support for the floating-point
types, which was introduced in `Clang 13`__. The older Clang versions will
either crash when compiling these tests, or generate an incorrect BTF.
__ https://reviews.llvm.org/D83289

View File

@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
};
static const char *btf_kind_str(__u16 kind)
@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
break;
}
case BTF_KIND_FLOAT:
fprintf(out, " size=%u", t->size);
break;
default:
break;
}

View File

@ -2,6 +2,44 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
#define OP_RT_RA_MASK 0xffff0000UL
#define LIS_R2 0x3c400000UL
#define ADDIS_R2_R12 0x3c4c0000UL
#define ADDI_R2_R2 0x38420000UL
static ssize_t get_offset(ssize_t addr, ssize_t base)
{
u32 *insn = (u32 *) addr;
/*
* A PPC64 ABIv2 function may have a local and a global entry
* point. We need to use the local entry point when patching
* functions, so identify and step over the global entry point
* sequence.
*
* The global entry point sequence is always of the form:
*
* addis r2,r12,XXXX
* addi r2,r2,XXXX
*
* A linker optimisation may convert the addis to lis:
*
* lis r2,XXXX
* addi r2,r2,XXXX
*/
if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
return (ssize_t)(insn + 2) - base;
else
return addr - base;
}
#else
#define get_offset(addr, base) (addr - base)
#endif
ssize_t get_base_addr() {
size_t start, offset;
char buf[256];
@ -36,7 +74,7 @@ void test_attach_probe(void)
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
uprobe_offset = (size_t)&get_base_addr - base_addr;
uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))

View File

@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
{ "loop6.o", BPF_PROG_TYPE_KPROBE },
/* partial unroll. 19k insn in a loop.
* Total program size 20.8k insn.

View File

@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = {
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
BTF_TYPE_ENC(0, 0x10000000, 4),
BTF_TYPE_ENC(0, 0x20000000, 4),
BTF_END_RAW,
},
.str_sec = "",
@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = {
.max_entries = 1,
},
{
.descr = "float test #1, well-formed",
.raw_types = {
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
/* [1] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */
BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */
BTF_MEMBER_ENC(NAME_TBD, 2, 0),
BTF_MEMBER_ENC(NAME_TBD, 3, 32),
BTF_MEMBER_ENC(NAME_TBD, 4, 64),
BTF_MEMBER_ENC(NAME_TBD, 5, 128),
BTF_MEMBER_ENC(NAME_TBD, 6, 256),
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
"\0floats\0a\0b\0c\0d\0e"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "float_type_check_btf",
.key_size = sizeof(int),
.value_size = 48,
.key_type_id = 1,
.value_type_id = 7,
.max_entries = 1,
},
{
.descr = "float test #2, invalid vlen",
.raw_types = {
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
/* [1] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
/* [2] */
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0float"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "float_type_check_btf",
.key_size = sizeof(int),
.value_size = 4,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 1,
.btf_load_err = true,
.err_str = "vlen != 0",
},
{
.descr = "float test #3, invalid kind_flag",
.raw_types = {
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
/* [1] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
/* [2] */
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0float"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "float_type_check_btf",
.key_size = sizeof(int),
.value_size = 4,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 1,
.btf_load_err = true,
.err_str = "Invalid btf_info kind_flag",
},
{
.descr = "float test #4, member does not fit",
.raw_types = {
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
/* [1] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */
BTF_MEMBER_ENC(NAME_TBD, 2, 0),
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0float\0floats\0x"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "float_type_check_btf",
.key_size = sizeof(int),
.value_size = 4,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 1,
.btf_load_err = true,
.err_str = "Member exceeds struct_size",
},
{
.descr = "float test #5, member is not properly aligned",
.raw_types = {
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
/* [1] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */
BTF_MEMBER_ENC(NAME_TBD, 2, 8),
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0float\0floats\0x"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "float_type_check_btf",
.key_size = sizeof(int),
.value_size = 4,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 1,
.btf_load_err = true,
.err_str = "Member is not properly aligned",
},
{
.descr = "float test #6, invalid size",
.raw_types = {
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
/* [1] */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0float"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "float_type_check_btf",
.key_size = sizeof(int),
.value_size = 6,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 1,
.btf_load_err = true,
.err_str = "Invalid type_size",
},
}; /* struct btf_raw_test raw_tests[] */
static const char *get_next_str(const char *start, const char *end)
@ -6281,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = {
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */
BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */
BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
@ -6296,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = {
/* full copy of the above */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */
BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */
BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */
BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */
BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
BTF_PTR_ENC(9), /* [10] */
BTF_PTR_ENC(12), /* [11] */
BTF_CONST_ENC(7), /* [12] */
BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
},
.expect = {
.raw_types = {
/* int */
BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */
BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */
BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */
BTF_END_RAW,
},
BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
},
.opts = {
.dont_resolve_fwds = false,
@ -6449,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_END_RAW,
},
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
},
.expect = {
.raw_types = {
@ -6474,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_END_RAW,
},
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
},
.opts = {
.dont_resolve_fwds = false,
},
},
{
.descr = "dedup: no int duplicates",
.descr = "dedup: no int/float duplicates",
.input = {
.raw_types = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@ -6498,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
/* all allowed sizes */
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0some other int"),
BTF_STR_SEC("\0int\0some other int\0float"),
},
.expect = {
.raw_types = {
@ -6516,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
/* all allowed sizes */
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
BTF_STR_SEC("\0int\0some other int"),
BTF_STR_SEC("\0int\0some other int\0float"),
},
.opts = {
.dont_resolve_fwds = false,
@ -6630,6 +6779,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_FLOAT:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);

View File

@ -266,6 +266,7 @@ static int duration = 0;
.arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
.ptr_sz = 8, /* always 8-byte pointer for BPF */ \
.enum_sz = sizeof(((type *)0)->enum_field), \
.float_sz = sizeof(((type *)0)->float_field), \
}
#define SIZE_CASE(name) { \

View File

@ -0,0 +1,130 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
static unsigned int duration;
static void test_hash_map(void)
{
int i, err, hashmap_fd, max_entries, percpu_map_fd;
struct for_each_hash_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u32 key, num_cpus, retval;
__u64 val;
skel = for_each_hash_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
return;
hashmap_fd = bpf_map__fd(skel->maps.hashmap);
max_entries = bpf_map__max_entries(skel->maps.hashmap);
for (i = 0; i < max_entries; i++) {
key = i;
val = i + 1;
err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
if (!ASSERT_OK(err, "map_update"))
goto out;
}
num_cpus = bpf_num_possible_cpus();
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
goto out;
key = 1;
for (i = 0; i < num_cpus; i++)
percpu_valbuf[i] = i + 1;
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
key = 1;
err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
ASSERT_ERR(err, "hashmap_lookup");
ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
out:
free(percpu_valbuf);
for_each_hash_map_elem__destroy(skel);
}
static void test_array_map(void)
{
__u32 key, num_cpus, max_entries, retval;
int i, arraymap_fd, percpu_map_fd, err;
struct for_each_array_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u64 val, expected_total;
skel = for_each_array_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
return;
arraymap_fd = bpf_map__fd(skel->maps.arraymap);
expected_total = 0;
max_entries = bpf_map__max_entries(skel->maps.arraymap);
for (i = 0; i < max_entries; i++) {
key = i;
val = i + 1;
/* skip the last iteration for expected total */
if (i != max_entries - 1)
expected_total += val;
err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
if (!ASSERT_OK(err, "map_update"))
goto out;
}
num_cpus = bpf_num_possible_cpus();
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
goto out;
key = 0;
for (i = 0; i < num_cpus; i++)
percpu_valbuf[i] = i + 1;
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
out:
free(percpu_valbuf);
for_each_array_map_elem__destroy(skel);
}
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
test_hash_map();
if (test__start_subtest("array_map"))
test_array_map();
}

View File

@ -2,12 +2,31 @@
#include <test_progs.h>
#include <network_helpers.h>
#include "test_pkt_access.skel.h"
static const __u32 duration;
static void check_run_cnt(int prog_fd, __u64 run_cnt)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int err;
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
return;
CHECK(run_cnt != info.run_cnt, "run_cnt",
"incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
}
void test_prog_run_xattr(void)
{
const char *file = "./test_pkt_access.o";
struct bpf_object *obj;
char buf[10];
int err;
struct test_pkt_access *skel;
int err, stats_fd = -1;
char buf[10] = {};
__u64 run_cnt = 0;
struct bpf_prog_test_run_attr tattr = {
.repeat = 1,
.data_in = &pkt_v4,
@ -16,12 +35,15 @@ void test_prog_run_xattr(void)
.data_size_out = 5,
};
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
&tattr.prog_fd);
if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
return;
memset(buf, 0, sizeof(buf));
skel = test_pkt_access__open_and_load();
if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
goto cleanup;
tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
@ -34,8 +56,12 @@ void test_prog_run_xattr(void)
CHECK_ATTR(buf[5] != 0, "overflow",
"BPF_PROG_TEST_RUN ignored size hint\n");
run_cnt += tattr.repeat;
check_run_cnt(tattr.prog_fd, run_cnt);
tattr.data_out = NULL;
tattr.data_size_out = 0;
tattr.repeat = 2;
errno = 0;
err = bpf_prog_test_run_xattr(&tattr);
@ -46,5 +72,12 @@ void test_prog_run_xattr(void)
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
bpf_object__close(obj);
run_cnt += tattr.repeat;
check_run_cnt(tattr.prog_fd, run_cnt);
cleanup:
if (skel)
test_pkt_access__destroy(skel);
if (stats_fd != -1)
close(stats_fd);
}

View File

@ -241,6 +241,48 @@ fail:
return -1;
}
static __u64 socket_cookie(int fd)
{
__u64 cookie;
socklen_t cookie_len = sizeof(cookie);
if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
"getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
return 0;
return cookie;
}
static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
const char *remote_ip, __u16 remote_port)
{
void *local, *remote;
int err;
memset(ctx, 0, sizeof(*ctx));
ctx->local_port = local_port;
ctx->remote_port = htons(remote_port);
if (is_ipv6(local_ip)) {
ctx->family = AF_INET6;
local = &ctx->local_ip6[0];
remote = &ctx->remote_ip6[0];
} else {
ctx->family = AF_INET;
local = &ctx->local_ip4;
remote = &ctx->remote_ip4;
}
err = inet_pton(ctx->family, local_ip, local);
if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
return 1;
err = inet_pton(ctx->family, remote_ip, remote);
if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
return 1;
return 0;
}
static int send_byte(int fd)
{
ssize_t n;
@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
static void run_sk_assign(struct test_sk_lookup *skel,
struct bpf_program *lookup_prog,
const char *listen_ip, const char *connect_ip)
const char *remote_ip, const char *local_ip)
{
int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
struct bpf_link *lookup_link;
int server_fds[MAX_SERVERS] = { -1 };
struct bpf_sk_lookup ctx;
__u64 server_cookie;
int i, err;
lookup_link = attach_lookup_prog(lookup_prog);
if (!lookup_link)
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.ctx_in = &ctx,
.ctx_size_in = sizeof(ctx),
.ctx_out = &ctx,
.ctx_size_out = sizeof(ctx),
);
if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
return;
ctx.protocol = IPPROTO_TCP;
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
if (server_fds[i] < 0)
goto close_servers;
@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
goto close_servers;
}
client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
if (client_fd < 0)
server_cookie = socket_cookie(server_fds[SERVER_B]);
if (!server_cookie)
return;
err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
if (CHECK(err, "test_run", "failed with error %d\n", errno))
goto close_servers;
peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
if (CHECK(peer_fd < 0, "accept", "failed\n"))
goto close_client;
if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
goto close_servers;
CHECK(ctx.cookie != server_cookie, "ctx.cookie",
"selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
close(peer_fd);
close_client:
close(client_fd);
close_servers:
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
if (server_fds[i] != -1)
close(server_fds[i]);
}
bpf_link__destroy(lookup_link);
}
static void run_sk_assign_v4(struct test_sk_lookup *skel,

View File

@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int parser = bpf_program__fd(skel->progs.prog_skb_parser);
int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int parser = bpf_program__fd(skel->progs.prog_skb_parser);
int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;

View File

@ -0,0 +1,92 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sys/types.h>
#include <test_progs.h>
#include "task_local_storage.skel.h"
#include "task_local_storage_exit_creds.skel.h"
#include "task_ls_recursion.skel.h"
static void test_sys_enter_exit(void)
{
struct task_local_storage *skel;
int err;
skel = task_local_storage__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
skel->bss->target_pid = syscall(SYS_gettid);
err = task_local_storage__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
syscall(SYS_gettid);
syscall(SYS_gettid);
/* 3x syscalls: 1x attach and 2x gettid */
ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
out:
task_local_storage__destroy(skel);
}
static void test_exit_creds(void)
{
struct task_local_storage_exit_creds *skel;
int err;
skel = task_local_storage_exit_creds__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
err = task_local_storage_exit_creds__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
/* trigger at least one exit_creds() */
if (CHECK_FAIL(system("ls > /dev/null")))
goto out;
/* sync rcu to make sure exit_creds() is called for "ls" */
kern_sync_rcu();
ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
out:
task_local_storage_exit_creds__destroy(skel);
}
static void test_recursion(void)
{
struct task_ls_recursion *skel;
int err;
skel = task_ls_recursion__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
err = task_ls_recursion__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
/* trigger sys_enter, make sure it does not cause deadlock */
syscall(SYS_gettid);
out:
task_ls_recursion__destroy(skel);
}
void test_task_local_storage(void)
{
if (test__start_subtest("sys_enter_exit"))
test_sys_enter_exit();
if (test__start_subtest("exit_creds"))
test_exit_creds();
if (test__start_subtest("recursion"))
test_recursion();
}

View File

@ -205,6 +205,12 @@ struct struct_with_embedded_stuff {
int t[11];
};
struct float_struct {
float f;
const double *d;
volatile long double *ld;
};
struct root_struct {
enum e1 _1;
enum e2 _2;
@ -219,6 +225,7 @@ struct root_struct {
union_fwd_t *_12;
union_fwd_ptr_t _13;
struct struct_with_embedded_stuff _14;
struct float_struct _15;
};
/* ------ END-EXPECTED-OUTPUT ------ */

View File

@ -807,6 +807,7 @@ struct core_reloc_size_output {
int arr_elem_sz;
int ptr_sz;
int enum_sz;
int float_sz;
};
struct core_reloc_size {
@ -816,6 +817,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
float float_field;
};
struct core_reloc_size___diff_sz {
@ -825,6 +827,7 @@ struct core_reloc_size___diff_sz {
char arr_field[10];
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
double float_field;
};
/* Error case of two candidates with the fields (int_field) at the same
@ -839,6 +842,7 @@ struct core_reloc_size___err_ambiguous1 {
int arr_field[4];
void *ptr_field;
enum { VALUE___1 = 123 } enum_field;
float float_field;
};
struct core_reloc_size___err_ambiguous2 {
@ -850,6 +854,7 @@ struct core_reloc_size___err_ambiguous2 {
int arr_field[4];
void *ptr_field;
enum { VALUE___2 = 123 } enum_field;
float float_field;
};
/*

View File

@ -0,0 +1,61 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 3);
__type(key, __u32);
__type(value, __u64);
} arraymap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} percpu_map SEC(".maps");
struct callback_ctx {
int output;
};
static __u64
check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
data->output += *val;
if (*key == 1)
return 1; /* stop the iteration */
return 0;
}
__u32 cpu = 0;
__u64 percpu_val = 0;
static __u64
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
cpu = bpf_get_smp_processor_id();
percpu_val = *val;
return 0;
}
u32 arraymap_output = 0;
SEC("classifier")
int test_pkt_access(struct __sk_buff *skb)
{
struct callback_ctx data;
data.output = 0;
bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
arraymap_output = data.output;
bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
return 0;
}

View File

@ -0,0 +1,95 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 3);
__type(key, __u32);
__type(value, __u64);
} hashmap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} percpu_map SEC(".maps");
struct callback_ctx {
struct __sk_buff *ctx;
int input;
int output;
};
static __u64
check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
struct __sk_buff *skb = data->ctx;
__u32 k;
__u64 v;
if (skb) {
k = *key;
v = *val;
if (skb->len == 10000 && k == 10 && v == 10)
data->output = 3; /* impossible path */
else
data->output = 4;
} else {
data->output = data->input;
bpf_map_delete_elem(map, key);
}
return 0;
}
__u32 cpu = 0;
__u32 percpu_called = 0;
__u32 percpu_key = 0;
__u64 percpu_val = 0;
int percpu_output = 0;
static __u64
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *unused)
{
struct callback_ctx data;
percpu_called++;
cpu = bpf_get_smp_processor_id();
percpu_key = *key;
percpu_val = *val;
data.ctx = 0;
data.input = 100;
data.output = 0;
bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
percpu_output = data.output;
return 0;
}
int hashmap_output = 0;
int hashmap_elems = 0;
int percpu_map_elems = 0;
SEC("classifier")
int test_pkt_access(struct __sk_buff *skb)
{
struct callback_ctx data;
data.ctx = skb;
data.input = 10;
data.output = 0;
hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
hashmap_output = data.output;
percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
(void *)0, 0);
return 0;
}

View File

@ -0,0 +1,99 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ptrace.h>
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
/* typically virtio scsi has max SGs of 6 */
#define VIRTIO_MAX_SGS 6
/* Verifier will fail with SG_MAX = 128. The failure can be
* workarounded with a smaller SG_MAX, e.g. 10.
*/
#define WORKAROUND
#ifdef WORKAROUND
#define SG_MAX 10
#else
/* typically virtio blk has max SEG of 128 */
#define SG_MAX 128
#endif
#define SG_CHAIN 0x01UL
#define SG_END 0x02UL
struct scatterlist {
unsigned long page_link;
unsigned int offset;
unsigned int length;
};
#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
#define sg_is_last(sg) ((sg)->page_link & SG_END)
#define sg_chain_ptr(sg) \
((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
{
struct scatterlist sg;
bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
if (sg_is_last(&sg))
return NULL;
sgp++;
bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
if (sg_is_chain(&sg))
sgp = sg_chain_ptr(&sg);
return sgp;
}
static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
{
struct scatterlist *sgp;
bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
return sgp;
}
int config = 0;
int result = 0;
SEC("kprobe/virtqueue_add_sgs")
int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
unsigned int out_sgs, unsigned int in_sgs)
{
struct scatterlist *sgp = NULL;
__u64 length1 = 0, length2 = 0;
unsigned int i, n, len;
if (config != 0)
return 0;
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
length1 += len;
n++;
}
}
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
length2 += len;
n++;
}
}
config = 1;
result = length2 - length1;
return 0;
}

View File

@ -0,0 +1,64 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} enter_id SEC(".maps");
#define MAGIC_VALUE 0xabcd1234
pid_t target_pid = 0;
int mismatch_cnt = 0;
int enter_cnt = 0;
int exit_cnt = 0;
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
ptr = bpf_task_storage_get(&enter_id, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
__sync_fetch_and_add(&enter_cnt, 1);
*ptr = MAGIC_VALUE + enter_cnt;
return 0;
}
SEC("tp_btf/sys_exit")
int BPF_PROG(on_exit, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
ptr = bpf_task_storage_get(&enter_id, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
return 0;
__sync_fetch_and_add(&exit_cnt, 1);
if (*ptr != MAGIC_VALUE + exit_cnt)
__sync_fetch_and_add(&mismatch_cnt, 1);
return 0;
}

View File

@ -0,0 +1,32 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, __u64);
} task_storage SEC(".maps");
int valid_ptr_count = 0;
int null_ptr_count = 0;
SEC("fentry/exit_creds")
int BPF_PROG(trace_exit_creds, struct task_struct *task)
{
__u64 *ptr;
ptr = bpf_task_storage_get(&task_storage, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
__sync_fetch_and_add(&valid_ptr_count, 1);
else
__sync_fetch_and_add(&null_ptr_count, 1);
return 0;
}

View File

@ -0,0 +1,70 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} map_a SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} map_b SEC(".maps");
SEC("fentry/bpf_local_storage_lookup")
int BPF_PROG(on_lookup)
{
struct task_struct *task = bpf_get_current_task_btf();
bpf_task_storage_delete(&map_a, task);
bpf_task_storage_delete(&map_b, task);
return 0;
}
SEC("fentry/bpf_local_storage_update")
int BPF_PROG(on_update)
{
struct task_struct *task = bpf_get_current_task_btf();
long *ptr;
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
return 0;
}
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
long *ptr;
task = bpf_get_current_task_btf();
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 200;
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 100;
return 0;
}

View File

@ -21,6 +21,7 @@ struct core_reloc_size_output {
int arr_elem_sz;
int ptr_sz;
int enum_sz;
int float_sz;
};
struct core_reloc_size {
@ -30,6 +31,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
float float_field;
};
SEC("raw_tracepoint/sys_enter")
@ -45,6 +47,7 @@ int test_core_size(void *ctx)
out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
out->ptr_sz = bpf_core_field_size(in->ptr_field);
out->enum_sz = bpf_core_field_size(in->enum_field);
out->float_sz = bpf_core_field_size(in->float_field);
return 0;
}

View File

@ -64,6 +64,10 @@ static const int PROG_DONE = 1;
static const __u32 KEY_SERVER_A = SERVER_A;
static const __u32 KEY_SERVER_B = SERVER_B;
static const __u16 SRC_PORT = bpf_htons(8008);
static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
static const __u16 DST_PORT = 7007; /* Host byte order */
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
return SK_DROP;
/* Narrow loads from remote_port field. Expect non-0 value. */
if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
/* Narrow loads from remote_port field. Expect SRC_PORT. */
if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
return SK_DROP;
if (LSW(ctx->remote_port, 0) == 0)
if (LSW(ctx->remote_port, 0) != SRC_PORT)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv4 fields */
if (v4) {
/* Expect non-0.0.0.0 in remote_ip4 */
if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
/* Expect SRC_IP4 in remote_ip4 */
if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
return SK_DROP;
if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv6 fields */
if (!v4) {
/* Expect non-:: IP in remote_ip6 */
if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
/* Expect SRC_IP6 in remote_ip6 */
if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
return SK_DROP;
if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP6 in local_ip6 */
if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||

View File

@ -31,13 +31,13 @@ struct {
static volatile bool test_sockmap; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_skb_parser(struct __sk_buff *skb)
int prog_stream_parser(struct __sk_buff *skb)
{
return skb->len;
}
SEC("sk_skb/stream_verdict")
int prog_skb_verdict(struct __sk_buff *skb)
int prog_stream_verdict(struct __sk_buff *skb)
{
unsigned int *count;
__u32 zero = 0;

Some files were not shown because too many files have changed in this diff Show More