Merge branch 'xskmap-lookup'

Jonathan Lemon says:

====================
Currently, the AF_XDP code uses a separate map in order to
determine if an xsk is bound to a queue.  Have the xskmap
lookup return a XDP_SOCK pointer on the kernel side, which
the verifier uses to extract relevant values.

Patches:
 1 - adds XSK_SOCK type
 2 - sync bpf.h with tools
 3 - add tools selftest
 4 - update lib/bpf, removing qidconf

v4->v5:
 - xskmap lookup now returns XDP_SOCK type instead of pointer to element.
 - no changes lib/bpf/xsk.c

v3->v4:
 - Clarify error handling path.

v2->v3:
 - Use correct map type.
====================

Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2019-06-10 23:31:48 -07:00
commit ab1b3a957d
10 changed files with 135 additions and 94 deletions

View File

@ -280,6 +280,7 @@ enum bpf_reg_type {
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
};
/* The information passed from prog-specific *_is_valid_access
@ -727,6 +728,13 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)

View File

@ -58,11 +58,11 @@ struct xdp_sock {
struct xdp_umem *umem;
struct list_head flush_node;
u16 queue_id;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head list;
bool zc;
/* Protects multiple processes in the control path */
struct mutex mutex;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head list;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/

View File

@ -3083,6 +3083,10 @@ struct bpf_sock_tuple {
};
};
struct bpf_xdp_sock {
__u32 queue_id;
};
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.

View File

@ -334,7 +334,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET ||
type == PTR_TO_SOCK_COMMON ||
type == PTR_TO_TCP_SOCK;
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_XDP_SOCK;
}
static bool reg_type_may_be_null(enum bpf_reg_type type)
@ -406,6 +407,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
};
static char slot_type_char[] = {
@ -1363,6 +1365,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
return true;
default:
return false;
@ -1843,6 +1846,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
case PTR_TO_TCP_SOCK:
valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
break;
case PTR_TO_XDP_SOCK:
valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
break;
default:
valid = false;
}
@ -2007,6 +2013,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_TCP_SOCK:
pointer_desc = "tcp_sock ";
break;
case PTR_TO_XDP_SOCK:
pointer_desc = "xdp_sock ";
break;
default:
break;
}
@ -2905,10 +2914,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
* appear.
*/
case BPF_MAP_TYPE_CPUMAP:
case BPF_MAP_TYPE_XSKMAP:
if (func_id != BPF_FUNC_redirect_map)
goto error;
break;
case BPF_MAP_TYPE_XSKMAP:
if (func_id != BPF_FUNC_redirect_map &&
func_id != BPF_FUNC_map_lookup_elem)
goto error;
break;
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
case BPF_MAP_TYPE_HASH_OF_MAPS:
if (func_id != BPF_FUNC_map_lookup_elem)
@ -3799,6 +3812,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
@ -5038,6 +5052,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
if (reg->map_ptr->inner_map_meta) {
reg->type = CONST_PTR_TO_MAP;
reg->map_ptr = reg->map_ptr->inner_map_meta;
} else if (reg->map_ptr->map_type ==
BPF_MAP_TYPE_XSKMAP) {
reg->type = PTR_TO_XDP_SOCK;
} else {
reg->type = PTR_TO_MAP_VALUE;
}
@ -6299,6 +6316,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
@ -6693,6 +6711,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
return false;
default:
return true;
@ -7826,6 +7845,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
case PTR_TO_TCP_SOCK:
convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
break;
case PTR_TO_XDP_SOCK:
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
default:
continue;
}

View File

@ -151,6 +151,12 @@ void __xsk_map_flush(struct bpf_map *map)
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return __xsk_map_lookup_elem(map, *(u32 *)key);
}
static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
{
return ERR_PTR(-EOPNOTSUPP);
}
@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = {
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
.map_lookup_elem = xsk_map_lookup_elem,
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,

View File

@ -5680,6 +5680,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
return INET_ECN_set_ce(skb);
}
bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
return false;
if (off % size != 0)
return false;
switch (off) {
default:
return size == sizeof(__u32);
}
}
u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
#define BPF_XDP_SOCK_GET(FIELD) \
do { \
BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct xdp_sock, FIELD)); \
} while (0)
switch (si->off) {
case offsetof(struct bpf_xdp_sock, queue_id):
BPF_XDP_SOCK_GET(queue_id);
break;
}
return insn - insn_buf;
}
static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
.func = bpf_skb_ecn_set_ce,
.gpl_only = false,

View File

@ -3083,6 +3083,10 @@ struct bpf_sock_tuple {
};
};
struct bpf_xdp_sock {
__u32 queue_id;
};
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.

View File

@ -60,10 +60,8 @@ struct xsk_socket {
struct xsk_umem *umem;
struct xsk_socket_config config;
int fd;
int xsks_map;
int ifindex;
int prog_fd;
int qidconf_map_fd;
int xsks_map_fd;
__u32 queue_id;
char ifname[IFNAMSIZ];
@ -265,15 +263,11 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* This is the C-program:
* SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
* {
* int *qidconf, index = ctx->rx_queue_index;
* int index = ctx->rx_queue_index;
*
* // A set entry here means that the correspnding queue_id
* // has an active AF_XDP socket bound to it.
* qidconf = bpf_map_lookup_elem(&qidconf_map, &index);
* if (!qidconf)
* return XDP_ABORTED;
*
* if (*qidconf)
* if (bpf_map_lookup_elem(&xsks_map, &index))
* return bpf_redirect_map(&xsks_map, index, 0);
*
* return XDP_PASS;
@ -286,15 +280,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd),
BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV32_IMM(BPF_REG_0, 0),
/* if r1 == 0 goto +8 */
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
BPF_MOV32_IMM(BPF_REG_0, 2),
/* r1 = *(u32 *)(r1 + 0) */
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
/* if r1 == 0 goto +5 */
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
/* r2 = *(u32 *)(r10 - 4) */
@ -366,18 +355,11 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
if (max_queues < 0)
return max_queues;
fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map",
fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
sizeof(int), sizeof(int), max_queues, 0);
if (fd < 0)
return fd;
xsk->qidconf_map_fd = fd;
fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
sizeof(int), sizeof(int), max_queues, 0);
if (fd < 0) {
close(xsk->qidconf_map_fd);
return fd;
}
xsk->xsks_map_fd = fd;
return 0;
@ -385,10 +367,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
close(xsk->qidconf_map_fd);
bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
close(xsk->xsks_map_fd);
xsk->qidconf_map_fd = -1;
xsk->xsks_map_fd = -1;
}
static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@ -417,10 +397,9 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
if (err)
goto out_map_ids;
for (i = 0; i < prog_info.nr_map_ids; i++) {
if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
break;
xsk->xsks_map_fd = -1;
for (i = 0; i < prog_info.nr_map_ids; i++) {
fd = bpf_map_get_fd_by_id(map_ids[i]);
if (fd < 0)
continue;
@ -431,11 +410,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
continue;
}
if (!strcmp(map_info.name, "qidconf_map")) {
xsk->qidconf_map_fd = fd;
continue;
}
if (!strcmp(map_info.name, "xsks_map")) {
xsk->xsks_map_fd = fd;
continue;
@ -445,40 +419,18 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
err = 0;
if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
if (xsk->xsks_map_fd == -1)
err = -ENOENT;
xsk_delete_bpf_maps(xsk);
}
out_map_ids:
free(map_ids);
return err;
}
static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
{
int qid = false;
bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
}
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
int qid = true, fd = xsk->fd, err;
err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
if (err)
goto out;
err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
if (err)
goto out;
return 0;
out:
xsk_clear_bpf_maps(xsk);
return err;
return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
&xsk->fd, 0);
}
static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
@ -497,26 +449,27 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;
err = xsk_load_xdp_prog(xsk);
if (err)
goto out_maps;
if (err) {
xsk_delete_bpf_maps(xsk);
return err;
}
} else {
xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
err = xsk_lookup_bpf_maps(xsk);
if (err)
goto out_load;
if (err) {
close(xsk->prog_fd);
return err;
}
}
err = xsk_set_bpf_maps(xsk);
if (err)
goto out_load;
if (err) {
xsk_delete_bpf_maps(xsk);
close(xsk->prog_fd);
return err;
}
return 0;
out_load:
close(xsk->prog_fd);
out_maps:
xsk_delete_bpf_maps(xsk);
return err;
}
int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
@ -643,9 +596,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}
xsk->qidconf_map_fd = -1;
xsk->xsks_map_fd = -1;
xsk->prog_fd = -1;
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
if (err)
@ -708,8 +659,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)
if (!xsk)
return;
xsk_clear_bpf_maps(xsk);
xsk_delete_bpf_maps(xsk);
if (xsk->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(xsk->prog_fd);
}
optlen = sizeof(off);
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);

View File

@ -28,21 +28,6 @@
.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
.prog_type = BPF_PROG_TYPE_SOCK_OPS,
},
{
"prevent map lookup in xskmap",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_EXIT_INSN(),
},
.fixup_map_xskmap = { 3 },
.result = REJECT,
.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
.prog_type = BPF_PROG_TYPE_XDP,
},
{
"prevent map lookup in stack trace",
.insns = {

View File

@ -498,3 +498,21 @@
.result = REJECT,
.errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
},
{
"bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
.insns = {
BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_xskmap = { 3 },
.prog_type = BPF_PROG_TYPE_XDP,
.result = ACCEPT,
},