Alexei Starovoitov says:

====================
pull-request: bpf 2021-11-09

We've added 7 non-merge commits during the last 3 day(s) which contain
a total of 10 files changed, 174 insertions(+), 48 deletions(-).

The main changes are:

1) Various sockmap fixes, from John and Jussi.

2) Fix out-of-bound issue with bpf_pseudo_func, from Martin.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf, sockmap: sk_skb data_end access incorrect when src_reg = dst_reg
  bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and colliding
  bpf, sockmap: Fix race in ingress receive verdict with redirect to self
  bpf, sockmap: Remove unhash handler for BPF sockmap usage
  bpf, sockmap: Use stricter sk state checks in sk_lookup_assign
  bpf: selftest: Trigger a DCE on the whole subprog
  bpf: Stop caching subprog index in the bpf_pseudo_func insn
====================

Link: https://lore.kernel.org/r/20211109215702.38350-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2021-11-09 15:44:47 -08:00
commit fceb07950a
10 changed files with 174 additions and 48 deletions

View file

@ -484,6 +484,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
aux->ctx_field_size = size;
}
static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_prog_ops {
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);

View file

@ -507,6 +507,18 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
return !!psock->saved_data_ready;
}
static inline bool sk_is_tcp(const struct sock *sk)
{
return sk->sk_type == SOCK_STREAM &&
sk->sk_protocol == IPPROTO_TCP;
}
static inline bool sk_is_udp(const struct sock *sk)
{
return sk->sk_type == SOCK_DGRAM &&
sk->sk_protocol == IPPROTO_UDP;
}
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
#define BPF_F_STRPARSER (1UL << 1)

View file

@ -54,10 +54,28 @@ struct strp_msg {
int offset;
};
struct _strp_msg {
/* Internal cb structure. struct strp_msg must be first for passing
* to upper layer.
*/
struct strp_msg strp;
int accum_len;
};
struct sk_skb_cb {
#define SK_SKB_CB_PRIV_LEN 20
unsigned char data[SK_SKB_CB_PRIV_LEN];
struct _strp_msg strp;
/* temp_reg is a temporary register used for bpf_convert_data_end_access
* when dst_reg == src_reg.
*/
u64 temp_reg;
};
static inline struct strp_msg *strp_msg(struct sk_buff *skb)
{
return (struct strp_msg *)((void *)skb->cb +
offsetof(struct qdisc_skb_cb, data));
offsetof(struct sk_skb_cb, strp));
}
/* Structure for an attached lower socket */

View file

@ -390,6 +390,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
i = end_new;
insn = prog->insnsi + end_old;
}
if (bpf_pseudo_func(insn)) {
ret = bpf_adj_delta_to_imm(insn, pos, end_old,
end_new, i, probe_pass);
if (ret)
return ret;
continue;
}
code = insn->code;
if ((BPF_CLASS(code) != BPF_JMP &&
BPF_CLASS(code) != BPF_JMP32) ||

View file

@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
}
static bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
@ -1960,16 +1954,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
return -EPERM;
}
if (bpf_pseudo_func(insn)) {
if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
ret = add_subprog(env, i + insn->imm + 1);
if (ret >= 0)
/* remember subprog */
insn[1].imm = ret;
} else if (bpf_pseudo_call(insn)) {
ret = add_subprog(env, i + insn->imm + 1);
} else {
else
ret = add_kfunc_call(env, insn->imm, insn->off);
}
if (ret < 0)
return ret;
@ -9387,7 +9375,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->src_reg == BPF_PSEUDO_FUNC) {
struct bpf_prog_aux *aux = env->prog->aux;
u32 subprogno = insn[1].imm;
u32 subprogno = find_subprog(env,
env->insn_idx + insn->imm + 1);
if (!aux->func_info) {
verbose(env, "missing btf func_info\n");
@ -12557,14 +12546,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
env->insn_aux_data[i].call_imm = insn->imm;
/* subprog is encoded in insn[1].imm */
if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
continue;
}
if (!bpf_pseudo_call(insn))
continue;
/* Upon error here we cannot fall back to interpreter but
* need a hard reject of the program. Thus -EFAULT is
* propagated in any case.
@ -12585,6 +12569,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
env->insn_aux_data[i].call_imm = insn->imm;
/* point imm to __bpf_call_base+1 from JITs point of view */
insn->imm = 1;
if (bpf_pseudo_func(insn))
/* jit (e.g. x86_64) may emit fewer instructions
* if it learns a u32 imm is the same as a u64 imm.
* Force a non zero here.
*/
insn[1].imm = 1;
}
err = bpf_prog_alloc_jited_linfo(prog);
@ -12669,7 +12659,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
if (bpf_pseudo_func(insn)) {
subprog = insn[1].imm;
subprog = insn->off;
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
continue;
@ -12720,7 +12710,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
insn[0].imm = env->insn_aux_data[i].call_imm;
insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
insn[1].imm = insn->off;
insn->off = 0;
continue;
}
if (!bpf_pseudo_call(insn))

View file

@ -9756,22 +9756,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
/* si->dst_reg = skb->data */
int reg;
int temp_reg_off = offsetof(struct sk_buff, cb) +
offsetof(struct sk_skb_cb, temp_reg);
if (si->src_reg == si->dst_reg) {
/* We need an extra register, choose and save a register. */
reg = BPF_REG_9;
if (si->src_reg == reg || si->dst_reg == reg)
reg--;
if (si->src_reg == reg || si->dst_reg == reg)
reg--;
*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
} else {
reg = si->dst_reg;
}
/* reg = skb->data */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
si->dst_reg, si->src_reg,
reg, si->src_reg,
offsetof(struct sk_buff, data));
/* AX = skb->len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, len));
/* si->dst_reg = skb->data + skb->len */
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
/* reg = skb->data + skb->len */
*insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
/* AX = skb->data_len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, data_len));
/* si->dst_reg = skb->data + skb->len - skb->data_len */
*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
/* reg = skb->data + skb->len - skb->data_len */
*insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
if (si->src_reg == si->dst_reg) {
/* Restore the saved register */
*insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
*insn++ = BPF_MOV64_REG(si->dst_reg, reg);
*insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
}
return insn;
}
@ -9782,11 +9806,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
int off;
switch (si->off) {
case offsetof(struct __sk_buff, data_end):
insn = bpf_convert_data_end_access(si, insn);
break;
case offsetof(struct __sk_buff, cb[0]) ...
offsetofend(struct __sk_buff, cb[4]) - 1:
BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
offsetof(struct sk_skb_cb, data)) %
sizeof(__u64));
prog->cb_access = 1;
off = si->off;
off -= offsetof(struct __sk_buff, cb[0]);
off += offsetof(struct sk_buff, cb);
off += offsetof(struct sk_skb_cb, data);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
break;
default:
return bpf_convert_ctx_access(type, si, insn_buf, prog,
target_size);
@ -10423,8 +10469,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
return -EINVAL;
if (unlikely(sk && sk_is_refcounted(sk)))
return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
return -ESOCKTNOSUPPORT; /* reject connected sockets */
if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
/* Check if socket is suitable for packet L3/L4 protocol */
if (sk && sk->sk_protocol != ctx->protocol)

View file

@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
static bool sk_is_tcp(const struct sock *sk)
{
return sk->sk_type == SOCK_STREAM &&
sk->sk_protocol == IPPROTO_TCP;
}
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))

View file

@ -172,6 +172,41 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
int nonblock,
int flags,
int *addr_len)
{
struct sk_psock *psock;
int copied;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
psock = sk_psock_get(sk);
if (unlikely(!psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
if (!copied) {
long timeo;
int data;
timeo = sock_rcvtimeo(sk, nonblock);
data = tcp_msg_wait_data(sk, psock, timeo);
if (data && !sk_psock_queue_empty(psock))
goto msg_bytes_ready;
copied = -EAGAIN;
}
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
}
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@ -464,6 +499,8 @@ enum {
enum {
TCP_BPF_BASE,
TCP_BPF_TX,
TCP_BPF_RX,
TCP_BPF_TXRX,
TCP_BPF_NUM_CFGS,
};
@ -475,7 +512,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
prot[TCP_BPF_BASE].unhash = sock_map_unhash;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
@ -483,6 +519,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
prot[TCP_BPF_RX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser;
prot[TCP_BPF_TXRX] = prot[TCP_BPF_TX];
prot[TCP_BPF_TXRX].recvmsg = tcp_bpf_recvmsg_parser;
}
static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
@ -520,6 +562,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
if (psock->progs.stream_verdict || psock->progs.skb_verdict) {
config = (config == TCP_BPF_TX) ? TCP_BPF_TXRX : TCP_BPF_RX;
}
if (restore) {
if (inet_csk_has_ulp(sk)) {
/* TLS does not have an unhash proto in SW cases,

View file

@ -27,18 +27,10 @@
static struct workqueue_struct *strp_wq;
struct _strp_msg {
/* Internal cb structure. struct strp_msg must be first for passing
* to upper layer.
*/
struct strp_msg strp;
int accum_len;
};
static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
{
return (struct _strp_msg *)((void *)skb->cb +
offsetof(struct qdisc_skb_cb, data));
offsetof(struct sk_skb_cb, strp));
}
/* Lower lock held */

View file

@ -23,6 +23,16 @@ struct callback_ctx {
int output;
};
const volatile int bypass_unused = 1;
static __u64
unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
data->output = 0;
return 1;
}
static __u64
check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
@ -54,6 +64,8 @@ int test_pkt_access(struct __sk_buff *skb)
data.output = 0;
bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
if (!bypass_unused)
bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0);
arraymap_output = data.output;
bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);