bpf-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZg7vEAAKCRDbK58LschI
 gxAeAQD18uHqGbcCCnOVnaETdi3bQcSBpobuclThpN1WdMILcwEA+5Vz9Lxmt6BH
 qF/vbVHAOeCZt3LTOJ/jx1GRVstVWQk=
 =+QKv
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-04-04

We've added 7 non-merge commits during the last 5 day(s) which contain
a total of 9 files changed, 75 insertions(+), 24 deletions(-).

The main changes are:

1) Fix x86 BPF JIT under retbleed=stuff which causes kernel panics due to
   incorrect destination IP calculation and incorrect IP for relocations,
   from Uros Bizjak and Joan Bruguera Micó.

2) Fix BPF arena file descriptor leaks in the verifier,
   from Anton Protopopov.

3) Defer bpf_link deallocation to after RCU grace period as currently
   running multi-{kprobes,uprobes} programs might still access cookie
   information from the link, from Andrii Nakryiko.

4) Fix a BPF sockmap lock inversion deadlock in map_delete_elem reported
   by syzkaller, from Jakub Sitnicki.

5) Fix resolve_btfids build with musl libc due to missing linux/types.h
   include, from Natanael Copa.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf, sockmap: Prevent lock inversion deadlock in map delete elem
  x86/bpf: Fix IP for relocating call depth accounting
  x86/bpf: Fix IP after emitting call depth accounting
  bpf: fix possible file descriptor leaks in verifier
  tools/resolve_btfids: fix build with musl libc
  bpf: support deferring bpf_link dealloc to after RCU grace period
  bpf: put uprobe link's path and task in release callback
====================

Link: https://lore.kernel.org/r/20240404183258.4401-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-04-04 11:37:39 -07:00
commit 1cfa2f10f4
9 changed files with 75 additions and 24 deletions

View File

@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
void *func)
void *func, void *ip)
{
return 0;
}

View File

@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
return !bcmp(pad, insn_buff, tmpl_size);
}
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0;
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
apply_relocation(insn_buff, tmpl_size, *pprog,
apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);
memcpy(*pprog, insn_buff, tmpl_size);

View File

@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
x86_call_depth_emit_accounting(pprog, func);
ip += x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, ip, 0xE8);
}
@ -1972,20 +1972,17 @@ populate_extable:
/* call */
case BPF_JMP | BPF_CALL: {
int offs;
u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
if (!imm32)
return -EINVAL;
offs = 7 + x86_call_depth_emit_accounting(&prog, func);
} else {
if (!imm32)
return -EINVAL;
offs = x86_call_depth_emit_accounting(&prog, func);
ip += 7;
}
if (emit_call(&prog, func, image + addrs[i - 1] + offs))
if (!imm32)
return -EINVAL;
ip += x86_call_depth_emit_accounting(&prog, func, ip);
if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
x86_call_depth_emit_accounting(&prog, NULL);
x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */

View File

@ -1574,12 +1574,26 @@ struct bpf_link {
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
/* rcu is used before freeing, work can be used to schedule that
* RCU-based freeing before that, so they never overlap
*/
union {
struct rcu_head rcu;
struct work_struct work;
};
};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
/* deallocate link resources callback, called without RCU grace period
* waiting
*/
void (*dealloc)(struct bpf_link *link);
/* deallocate link resources callback, called after RCU grace period;
* if underlying BPF program is sleepable we go through tasks trace
* RCU GP and then "classic" RCU GP
*/
void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);

View File

@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}
static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
{
struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
/* free bpf_link and its containing memory */
link->ops->dealloc_deferred(link);
}
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
bpf_link_defer_dealloc_rcu_gp(rcu);
else
call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
}
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
bool sleepable = false;
bpf_link_free_id(link->id);
if (link->prog) {
sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
/* free bpf_link and its containing memory */
link->ops->dealloc(link);
if (link->ops->dealloc_deferred) {
/* schedule BPF link deallocation; if underlying BPF program
* is sleepable, we need to first wait for RCU tasks trace
* sync, then go through "classic" RCU grace period
*/
if (sleepable)
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
}
if (link->ops->dealloc)
link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
.dealloc = bpf_raw_tp_link_dealloc,
.dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};

View File

@ -18379,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
}
if (!env->prog->jit_requested) {
verbose(env, "JIT is required to use arena\n");
fdput(f);
return -EOPNOTSUPP;
}
if (!bpf_jit_supports_arena()) {
verbose(env, "JIT doesn't support arena\n");
fdput(f);
return -EOPNOTSUPP;
}
env->prog->aux->arena = (void *)map;
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
fdput(f);
return -EINVAL;
}
}

View File

@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
.dealloc = bpf_kprobe_multi_link_dealloc,
.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
if (umulti_link->task)
put_task_struct(umulti_link->task);
path_put(&umulti_link->path);
}
static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
if (umulti_link->task)
put_task_struct(umulti_link->task);
path_put(&umulti_link->path);
kvfree(umulti_link->uprobes);
kfree(umulti_link);
}
@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
.dealloc = bpf_uprobe_multi_link_dealloc,
.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
};

View File

@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
if (irqs_disabled())
return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
if (irqs_disabled())
return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);

View File

@ -3,6 +3,8 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
#include <linux/types.h> /* for u32 */
struct btf_id_set {
u32 cnt;
u32 ids[];