bpf: Introduce may_goto instruction

Introduce may_goto instruction that from the verifier pov is similar to
open coded iterators bpf_for()/bpf_repeat() and bpf_loop() helper, but it
doesn't iterate any objects.
In assembly 'may_goto' is a nop most of the time until bpf runtime has to
terminate the program for whatever reason. In the current implementation
may_goto has a hidden counter, but other mechanisms can be used.
For programs written in C the later patch introduces 'cond_break' macro
that combines 'may_goto' with 'break' statement and has similar semantics:
cond_break is a nop until bpf runtime has to break out of this loop.
It can be used in any normal "for" or "while" loop, like

  for (i = zero; i < cnt; cond_break, i++) {

The verifier recognizes that may_goto is used in the program, reserves
additional 8 bytes of stack, initializes them in subprog prologue, and
replaces may_goto instruction with:
aux_reg = *(u64 *)(fp - 40)
if aux_reg == 0 goto pc+off
aux_reg -= 1
*(u64 *)(fp - 40) = aux_reg

may_goto instruction can be used by LLVM to implement __builtin_memcpy,
__builtin_strcmp.

may_goto is not a full substitute for bpf_for() macro.
bpf_for() doesn't have induction variable that verifiers sees,
so 'i' in bpf_for(i, 0, 100) is seen as imprecise and bounded.

But when the code is written as:
for (i = 0; i < 100; cond_break, i++)
the verifier see 'i' as precise constant zero,
hence cond_break (aka may_goto) doesn't help to converge the loop.
A static or global variable can be used as a workaround:
static int zero = 0;
for (i = zero; i < 100; cond_break, i++) // works!

may_goto works well with arena pointers that don't need to be bounds
checked on access. Load/store from arena returns imprecise unbounded
scalar and loops with may_goto pass the verifier.

Reserve new opcode BPF_JMP | BPF_JCOND for may_goto insn.
JCOND stands for conditional pseudo jump.
Since goto_or_nop insn was proposed, it may use the same opcode.
may_goto vs goto_or_nop can be distinguished by src_reg:
code = BPF_JMP | BPF_JCOND
src_reg = 0 - may_goto
src_reg = 1 - goto_or_nop

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20240306031929.42666-2-alexei.starovoitov@gmail.com
This commit is contained in:
Alexei Starovoitov 2024-03-05 19:19:26 -08:00 committed by Andrii Nakryiko
parent 9a9d1d3605
commit 011832b97b
6 changed files with 150 additions and 30 deletions

View File

@ -449,6 +449,7 @@ struct bpf_verifier_state {
u32 jmp_history_cnt;
u32 dfs_depth;
u32 callback_unroll_depth;
u32 may_goto_depth;
};
#define bpf_get_spilled_reg(slot, frame, mask) \
@ -619,6 +620,7 @@ struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
u16 stack_extra;
bool has_tail_call: 1;
bool tail_call_reachable: 1;
bool has_ld_abs: 1;

View File

@ -42,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@ -50,6 +51,10 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
/* Register numbers */
enum {
BPF_REG_0 = 0,

View File

@ -1675,6 +1675,7 @@ bool bpf_opcode_in_insntable(u8 code)
[BPF_LD | BPF_IND | BPF_B] = true,
[BPF_LD | BPF_IND | BPF_H] = true,
[BPF_LD | BPF_IND | BPF_W] = true,
[BPF_JMP | BPF_JCOND] = true,
};
#undef BPF_INSN_3_TBL
#undef BPF_INSN_2_TBL

View File

@ -322,6 +322,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
} else if (insn->code == (BPF_JMP | BPF_JA)) {
verbose(cbs->private_data, "(%02x) goto pc%+d\n",
insn->code, insn->off);
} else if (insn->code == (BPF_JMP | BPF_JCOND) &&
insn->src_reg == BPF_MAY_GOTO) {
verbose(cbs->private_data, "(%02x) may_goto pc%+d\n",
insn->code, insn->off);
} else if (insn->code == (BPF_JMP32 | BPF_JA)) {
verbose(cbs->private_data, "(%02x) gotol pc%+d\n",
insn->code, insn->imm);

View File

@ -533,6 +533,16 @@ static bool is_async_callback_calling_insn(struct bpf_insn *insn)
return bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm);
}
static bool is_may_goto_insn(struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
}
static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
{
return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
}
static bool is_storage_get_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_storage_get ||
@ -1429,6 +1439,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->dfs_depth = src->dfs_depth;
dst_state->callback_unroll_depth = src->callback_unroll_depth;
dst_state->used_as_loop_entry = src->used_as_loop_entry;
dst_state->may_goto_depth = src->may_goto_depth;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
@ -14871,11 +14882,36 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
int err;
/* Only conditional jumps are expected to reach here. */
if (opcode == BPF_JA || opcode > BPF_JSLE) {
if (opcode == BPF_JA || opcode > BPF_JCOND) {
verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
return -EINVAL;
}
if (opcode == BPF_JCOND) {
struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
int idx = *insn_idx;
if (insn->code != (BPF_JMP | BPF_JCOND) ||
insn->src_reg != BPF_MAY_GOTO ||
insn->dst_reg || insn->imm || insn->off == 0) {
verbose(env, "invalid may_goto off %d imm %d\n",
insn->off, insn->imm);
return -EINVAL;
}
prev_st = find_prev_entry(env, cur_st->parent, idx);
/* branch out 'fallthrough' insn as a new state to explore */
queued_st = push_stack(env, idx + 1, idx, false);
if (!queued_st)
return -ENOMEM;
queued_st->may_goto_depth++;
if (prev_st)
widen_imprecise_scalars(env, prev_st, queued_st);
*insn_idx += insn->off;
return 0;
}
/* check src2 operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
@ -15659,6 +15695,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
default:
/* conditional jump with two edges */
mark_prune_point(env, t);
if (is_may_goto_insn(insn))
mark_force_checkpoint(env, t);
ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret)
@ -17135,6 +17173,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
}
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, true)) {
update_loop_entry(cur, &sl->state);
goto hit;
}
goto skip_inf_loop_check;
}
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, true))
goto hit;
@ -17144,6 +17189,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
if (states_maybe_looping(&sl->state, cur) &&
states_equal(env, &sl->state, cur, true) &&
!iter_active_depths_differ(&sl->state, cur) &&
sl->state.may_goto_depth == cur->may_goto_depth &&
sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
verbose_linfo(env, insn_idx, "; ");
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
@ -19408,7 +19454,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
int i, ret, cnt, delta = 0;
int i, ret, cnt, delta = 0, cur_subprog = 0;
struct bpf_subprog_info *subprogs = env->subprog_info;
u16 stack_depth = subprogs[cur_subprog].stack_depth;
u16 stack_depth_extra = 0;
if (env->seen_exception && !env->exception_callback_subprog) {
struct bpf_insn patch[] = {
@ -19428,7 +19477,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
mark_subprog_exc_cb(env, env->exception_callback_subprog);
}
for (i = 0; i < insn_cnt; i++, insn++) {
for (i = 0; i < insn_cnt;) {
/* Make divide-by-zero exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
@ -19467,7 +19516,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
@ -19487,7 +19536,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Rewrite pointer arithmetic to mitigate speculation attacks. */
@ -19502,7 +19551,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
aux = &env->insn_aux_data[i + delta];
if (!aux->alu_state ||
aux->alu_state == BPF_ALU_NON_POINTER)
continue;
goto next_insn;
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
@ -19540,19 +19589,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
if (is_may_goto_insn(insn)) {
int stack_off = -stack_depth - 8;
stack_depth_extra = 8;
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
cnt = 4;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto next_insn;
}
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
goto next_insn;
if (insn->src_reg == BPF_PSEUDO_CALL)
continue;
goto next_insn;
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
if (ret)
return ret;
if (cnt == 0)
continue;
goto next_insn;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
@ -19561,7 +19630,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
if (insn->imm == BPF_FUNC_get_route_realm)
@ -19609,11 +19678,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
}
insn->imm = ret + 1;
continue;
goto next_insn;
}
if (!bpf_map_ptr_unpriv(aux))
continue;
goto next_insn;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
@ -19642,7 +19711,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
if (insn->imm == BPF_FUNC_timer_set_callback) {
@ -19754,7 +19823,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
@ -19785,31 +19854,31 @@ patch_map_ops_generic:
switch (insn->imm) {
case BPF_FUNC_map_lookup_elem:
insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
continue;
goto next_insn;
case BPF_FUNC_map_update_elem:
insn->imm = BPF_CALL_IMM(ops->map_update_elem);
continue;
goto next_insn;
case BPF_FUNC_map_delete_elem:
insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
continue;
goto next_insn;
case BPF_FUNC_map_push_elem:
insn->imm = BPF_CALL_IMM(ops->map_push_elem);
continue;
goto next_insn;
case BPF_FUNC_map_pop_elem:
insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
continue;
goto next_insn;
case BPF_FUNC_map_peek_elem:
insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
continue;
goto next_insn;
case BPF_FUNC_redirect_map:
insn->imm = BPF_CALL_IMM(ops->map_redirect);
continue;
goto next_insn;
case BPF_FUNC_for_each_map_elem:
insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
continue;
goto next_insn;
case BPF_FUNC_map_lookup_percpu_elem:
insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
continue;
goto next_insn;
}
goto patch_call_imm;
@ -19837,7 +19906,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Implement bpf_get_func_arg inline. */
@ -19862,7 +19931,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Implement bpf_get_func_ret inline. */
@ -19890,7 +19959,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Implement get_func_arg_cnt inline. */
@ -19905,7 +19974,7 @@ patch_map_ops_generic:
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Implement bpf_get_func_ip inline. */
@ -19920,7 +19989,7 @@ patch_map_ops_generic:
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
/* Implement bpf_kptr_xchg inline */
@ -19938,7 +20007,7 @@ patch_map_ops_generic:
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
goto next_insn;
}
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm, env->prog);
@ -19952,6 +20021,40 @@ patch_call_imm:
return -EFAULT;
}
insn->imm = fn->func - __bpf_call_base;
next_insn:
if (subprogs[cur_subprog + 1].start == i + delta + 1) {
subprogs[cur_subprog].stack_depth += stack_depth_extra;
subprogs[cur_subprog].stack_extra = stack_depth_extra;
cur_subprog++;
stack_depth = subprogs[cur_subprog].stack_depth;
stack_depth_extra = 0;
}
i++;
insn++;
}
env->prog->aux->stack_depth = subprogs[0].stack_depth;
for (i = 0; i < env->subprog_cnt; i++) {
int subprog_start = subprogs[i].start;
int stack_slots = subprogs[i].stack_extra / 8;
if (!stack_slots)
continue;
if (stack_slots > 1) {
verbose(env, "verifier bug: stack_slots supports may_goto only\n");
return -EFAULT;
}
/* Add ST insn to subprog prologue to init extra stack */
insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
-subprogs[i].stack_depth, BPF_MAX_LOOPS);
/* Copy first actual insn to preserve it */
insn_buf[1] = env->prog->insnsi[subprog_start];
new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2);
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
}
/* Since poke tab is now finalized, publish aux to tracker. */

View File

@ -42,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@ -50,6 +51,10 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
/* Register numbers */
enum {
BPF_REG_0 = 0,