Merge branch 'complete-bpf-verifier-precision-tracking-support-for-register-spills'

Andrii Nakryiko says:

====================
Complete BPF verifier precision tracking support for register spills

Add support to BPF verifier to track and support register spill/fill to/from
stack regardless if it was done through read-only R10 register (which is the
only form supported today), or through a general register after copying R10
into it, while also potentially modifying offset.

Once we add register this generic spill/fill support to precision
backtracking, we can take advantage of it to stop doing eager STACK_ZERO
conversion on register spill. Instead we can rely on (im)precision of spilled
const zero register to improve verifier state pruning efficiency. This
situation of using const zero register to initialize stack slots is very
common with __builtin_memset() usage or just zero-initializing variables on
the stack, and it causes unnecessary state duplication, as that STACK_ZERO
knowledge is often not necessary for correctness, as those zero values are
never used in precise context. Thus, relying on register imprecision helps
tremendously, especially in real-world BPF programs.

To make spilled const zero register behave completely equivalently to
STACK_ZERO, we need to improve few other small pieces, which is done in the
second part of the patch set. See individual patches for details. There are
also two small bug fixes spotted during STACK_ZERO debugging.

The patch set consists of logically three changes:
  - patch #1 (and corresponding tests in patch #2) is fixing/impoving precision
    propagation for stack spills/fills. This can be landed as a stand-alone
    improvement;
  - patches #3 through #9 is improving verification scalability by utilizing
    register (im)precision instead of eager STACK_ZERO. These changes depend
    on patch #1.
  - patch #10 is a memory efficiency improvement to how instruction/jump
    history is tracked and maintained. It depends on patch #1, but is not
    strictly speaking required, even though I believe it's a good long-term
    solution to have a path-dependent per-instruction information. Kind
    of like a path-dependent counterpart to path-agnostic insn_aux array.

v3->v3:
  - fixed up Fixes tag (Alexei);
  - fixed few more selftests to not use BPF_ST instruction in inline asm
    directly, checked with CI, it was happy (CI);
v2->v3:
  - BPF_ST instruction workaround (Eduard);
  - force dereference in added tests to catch problems (Eduard);
  - some commit message massaging (Alexei);
v1->v2:
  - clean ups, WARN_ONCE(), insn_flags helpers added (Eduard);
  - added more selftests for STACK_ZERO/STACK_MISC cases (Eduard);
  - a bit more detailed explanation of effect of avoiding STACK_ZERO in favor
    of register spill in patch #8 commit (Alexei);
  - global shared instruction history refactoring moved to be the last patch
    in the series to make it easier to revert it, if applied (Alexei).
====================

Link: https://lore.kernel.org/r/20231205184248.1502704-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2023-12-05 13:40:21 -08:00
commit 3aee2bf9c4
5 changed files with 406 additions and 115 deletions

View file

@ -325,12 +325,34 @@ struct bpf_func_state {
int allocated_stack;
};
struct bpf_idx_pair {
u32 prev_idx;
u32 idx;
#define MAX_CALL_FRAMES 8
/* instruction history flags, used in bpf_jmp_history_entry.flags field */
enum {
/* instruction references stack slot through PTR_TO_STACK register;
* we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
* and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512,
* 8 bytes per slot, so slot index (spi) is [0, 63])
*/
INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */
INSN_F_SPI_MASK = 0x3f, /* 6 bits */
INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
};
static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
struct bpf_jmp_history_entry {
u32 idx;
/* insn idx can't be bigger than 1 million */
u32 prev_idx : 22;
/* special flags, e.g., whether insn is doing register stack spill/load */
u32 flags : 10;
};
#define MAX_CALL_FRAMES 8
/* Maximum number of register states that can exist at once */
#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
struct bpf_verifier_state {
@ -413,7 +435,7 @@ struct bpf_verifier_state {
* For most states jmp_history_cnt is [0-3].
* For loops can go up to ~40.
*/
struct bpf_idx_pair *jmp_history;
struct bpf_jmp_history_entry *jmp_history;
u32 jmp_history_cnt;
u32 dfs_depth;
u32 callback_unroll_depth;
@ -656,6 +678,7 @@ struct bpf_verifier_env {
int cur_stack;
} cfg;
struct backtrack_state bt;
struct bpf_jmp_history_entry *cur_hist_ent;
u32 pass_cnt; /* number of times do_check() was called */
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */

View file

@ -1144,6 +1144,21 @@ static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
stack->spilled_ptr.type == SCALAR_VALUE;
}
/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
* case they are equivalent, or it's STACK_ZERO, in which case we preserve
* more precise STACK_ZERO.
* Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take
* env->allow_ptr_leaks into account and force STACK_MISC, if necessary.
*/
static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
{
if (*stype == STACK_ZERO)
return;
if (env->allow_ptr_leaks && *stype == STACK_INVALID)
return;
*stype = STACK_MISC;
}
static void scrub_spilled_slot(u8 *stype)
{
if (*stype != STACK_INVALID)
@ -1355,8 +1370,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
int i, err;
dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
GFP_USER);
src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
GFP_USER);
if (!dst_state->jmp_history)
return -ENOMEM;
dst_state->jmp_history_cnt = src->jmp_history_cnt;
@ -3221,6 +3236,21 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return __check_reg_arg(env, state->regs, regno, t);
}
static int insn_stack_access_flags(int frameno, int spi)
{
return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
}
static int insn_stack_access_spi(int insn_flags)
{
return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
}
static int insn_stack_access_frameno(int insn_flags)
{
return insn_flags & INSN_F_FRAMENO_MASK;
}
static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
{
env->insn_aux_data[idx].jmp_point = true;
@ -3232,28 +3262,51 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
}
/* for any branch, call, exit record the history of jmps in the given state */
static int push_jmp_history(struct bpf_verifier_env *env,
struct bpf_verifier_state *cur)
static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
int insn_flags)
{
u32 cnt = cur->jmp_history_cnt;
struct bpf_idx_pair *p;
struct bpf_jmp_history_entry *p;
size_t alloc_size;
if (!is_jmp_point(env, env->insn_idx))
/* combine instruction flags if we already recorded this instruction */
if (env->cur_hist_ent) {
/* atomic instructions push insn_flags twice, for READ and
* WRITE sides, but they should agree on stack slot
*/
WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
(env->cur_hist_ent->flags & insn_flags) != insn_flags,
"verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
env->insn_idx, env->cur_hist_ent->flags, insn_flags);
env->cur_hist_ent->flags |= insn_flags;
return 0;
}
cnt++;
alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
if (!p)
return -ENOMEM;
p[cnt - 1].idx = env->insn_idx;
p[cnt - 1].prev_idx = env->prev_insn_idx;
cur->jmp_history = p;
p = &cur->jmp_history[cnt - 1];
p->idx = env->insn_idx;
p->prev_idx = env->prev_insn_idx;
p->flags = insn_flags;
cur->jmp_history_cnt = cnt;
env->cur_hist_ent = p;
return 0;
}
static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
u32 hist_end, int insn_idx)
{
if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
return &st->jmp_history[hist_end - 1];
return NULL;
}
/* Backtrack one insn at a time. If idx is not at the top of recorded
* history then previous instruction came from straight line execution.
* Return -ENOENT if we exhausted all instructions within given state.
@ -3415,9 +3468,14 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
return bt->reg_masks[bt->frame] & (1 << reg);
}
static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
{
return bt->stack_masks[frame] & (1ull << slot);
}
static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
{
return bt->stack_masks[bt->frame] & (1ull << slot);
return bt_is_frame_slot_set(bt, bt->frame, slot);
}
/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
@ -3471,7 +3529,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
* - *was* processed previously during backtracking.
*/
static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
struct backtrack_state *bt)
struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
{
const struct bpf_insn_cbs cbs = {
.cb_call = disasm_kfunc_name,
@ -3484,7 +3542,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
u8 mode = BPF_MODE(insn->code);
u32 dreg = insn->dst_reg;
u32 sreg = insn->src_reg;
u32 spi, i;
u32 spi, i, fr;
if (insn->code == 0)
return 0;
@ -3545,20 +3603,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* by 'precise' mark in corresponding register of this state.
* No further tracking necessary.
*/
if (insn->src_reg != BPF_REG_FP)
if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
return 0;
/* dreg = *(u64 *)[fp - off] was a fill from the stack.
* that [fp - off] slot contains scalar that needs to be
* tracked with precision
*/
spi = (-insn->off - 1) / BPF_REG_SIZE;
if (spi >= 64) {
verbose(env, "BUG spi %d\n", spi);
WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT;
}
bt_set_slot(bt, spi);
spi = insn_stack_access_spi(hist->flags);
fr = insn_stack_access_frameno(hist->flags);
bt_set_frame_slot(bt, fr, spi);
} else if (class == BPF_STX || class == BPF_ST) {
if (bt_is_reg_set(bt, dreg))
/* stx & st shouldn't be using _scalar_ dst_reg
@ -3567,17 +3620,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
*/
return -ENOTSUPP;
/* scalars can only be spilled into stack */
if (insn->dst_reg != BPF_REG_FP)
if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
return 0;
spi = (-insn->off - 1) / BPF_REG_SIZE;
if (spi >= 64) {
verbose(env, "BUG spi %d\n", spi);
WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT;
}
if (!bt_is_slot_set(bt, spi))
spi = insn_stack_access_spi(hist->flags);
fr = insn_stack_access_frameno(hist->flags);
if (!bt_is_frame_slot_set(bt, fr, spi))
return 0;
bt_clear_slot(bt, spi);
bt_clear_frame_slot(bt, fr, spi);
if (class == BPF_STX)
bt_set_reg(bt, sreg);
} else if (class == BPF_JMP || class == BPF_JMP32) {
@ -3621,10 +3670,14 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT;
}
/* we don't track register spills perfectly,
* so fallback to force-precise instead of failing */
if (bt_stack_mask(bt) != 0)
return -ENOTSUPP;
/* we are now tracking register spills correctly,
* so any instance of leftover slots is a bug
*/
if (bt_stack_mask(bt) != 0) {
verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
return -EFAULT;
}
/* propagate r1-r5 to the caller */
for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
if (bt_is_reg_set(bt, i)) {
@ -3649,8 +3702,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT;
}
if (bt_stack_mask(bt) != 0)
return -ENOTSUPP;
if (bt_stack_mask(bt) != 0) {
verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
return -EFAULT;
}
/* clear r1-r5 in callback subprog's mask */
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
bt_clear_reg(bt, i);
@ -4087,6 +4143,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
for (;;) {
DECLARE_BITMAP(mask, 64);
u32 history = st->jmp_history_cnt;
struct bpf_jmp_history_entry *hist;
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
@ -4150,7 +4207,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
err = 0;
skip_first = false;
} else {
err = backtrack_insn(env, i, subseq_idx, bt);
hist = get_jmp_hist_entry(st, history, i);
err = backtrack_insn(env, i, subseq_idx, hist, bt);
}
if (err == -ENOTSUPP) {
mark_all_scalars_precise(env, env->cur_state);
@ -4203,22 +4261,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
for_each_set_bit(i, mask, 64) {
if (i >= func->allocated_stack / BPF_REG_SIZE) {
/* the sequence of instructions:
* 2: (bf) r3 = r10
* 3: (7b) *(u64 *)(r3 -8) = r0
* 4: (79) r4 = *(u64 *)(r10 -8)
* doesn't contain jmps. It's backtracked
* as a single block.
* During backtracking insn 3 is not recognized as
* stack access, so at the end of backtracking
* stack slot fp-8 is still marked in stack_mask.
* However the parent state may not have accessed
* fp-8 and it's "unallocated" stack space.
* In such case fallback to conservative.
*/
mark_all_scalars_precise(env, env->cur_state);
bt_reset(bt);
return 0;
verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
i, func->allocated_stack / BPF_REG_SIZE);
WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
return -EFAULT;
}
if (!is_spilled_scalar_reg(&func->stack[i])) {
@ -4355,7 +4401,8 @@ static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_
dst->live = live;
}
static void save_register_state(struct bpf_func_state *state,
static void save_register_state(struct bpf_verifier_env *env,
struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg,
int size)
{
@ -4370,7 +4417,7 @@ static void save_register_state(struct bpf_func_state *state,
/* size < 8 bytes spill */
for (; i; i--)
scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
}
static bool is_bpf_st_mem(struct bpf_insn *insn)
@ -4391,7 +4438,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
struct bpf_reg_state *reg = NULL;
u32 dst_reg = insn->dst_reg;
int insn_flags = insn_stack_access_flags(state->frameno, spi);
err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
if (err)
@ -4400,7 +4447,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
* so it's aligned access and [off, off + size) are within stack limits
*/
if (!env->allow_ptr_leaks &&
state->stack[spi].slot_type[0] == STACK_SPILL &&
is_spilled_reg(&state->stack[spi]) &&
size != BPF_REG_SIZE) {
verbose(env, "attempt to corrupt spilled pointer on stack\n");
return -EACCES;
@ -4430,20 +4477,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
!register_is_null(reg) && env->bpf_capable) {
if (dst_reg != BPF_REG_FP) {
/* The backtracking logic can only recognize explicit
* stack slot address like [fp - 8]. Other spill of
* scalar via different register has to be conservative.
* Backtrack from here and mark all registers as precise
* that contributed into 'reg' being a constant.
*/
err = mark_chain_precision(env, value_regno);
if (err)
return err;
}
save_register_state(state, spi, reg, size);
if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) {
save_register_state(env, state, spi, reg, size);
/* Break the relation on a narrowing spill. */
if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
state->stack[spi].spilled_ptr.id = 0;
@ -4453,7 +4488,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
__mark_reg_known(&fake_reg, insn->imm);
fake_reg.type = SCALAR_VALUE;
save_register_state(state, spi, &fake_reg, size);
save_register_state(env, state, spi, &fake_reg, size);
insn_flags = 0; /* not a register spill */
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@ -4465,7 +4501,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
save_register_state(state, spi, reg, size);
save_register_state(env, state, spi, reg, size);
} else {
u8 type = STACK_MISC;
@ -4490,7 +4526,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
/* when we zero initialize stack slots mark them as such */
if ((reg && register_is_null(reg)) ||
(!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
/* backtracking doesn't work for STACK_ZERO yet. */
/* STACK_ZERO case happened because register spill
* wasn't properly aligned at the stack slot boundary,
* so it's not a register spill anymore; force
* originating register to be precise to make
* STACK_ZERO correct for subsequent states
*/
err = mark_chain_precision(env, value_regno);
if (err)
return err;
@ -4499,9 +4540,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
/* Mark slots affected by this stack write. */
for (i = 0; i < size; i++)
state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
type;
state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
insn_flags = 0; /* not a register spill */
}
if (insn_flags)
return push_jmp_history(env, env->cur_state, insn_flags);
return 0;
}
@ -4694,6 +4738,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
struct bpf_reg_state *reg;
u8 *stype, type;
int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
stype = reg_state->stack[spi].slot_type;
reg = &reg_state->stack[spi].spilled_ptr;
@ -4726,25 +4771,42 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
copy_register_state(&state->regs[dst_regno], reg);
state->regs[dst_regno].subreg_def = subreg_def;
} else {
int spill_cnt = 0, zero_cnt = 0;
for (i = 0; i < size; i++) {
type = stype[(slot - i) % BPF_REG_SIZE];
if (type == STACK_SPILL)
if (type == STACK_SPILL) {
spill_cnt++;
continue;
}
if (type == STACK_MISC)
continue;
if (type == STACK_ZERO) {
zero_cnt++;
continue;
}
if (type == STACK_INVALID && env->allow_uninit_stack)
continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
}
mark_reg_unknown(env, state->regs, dst_regno);
if (spill_cnt == size &&
tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
__mark_reg_const_zero(&state->regs[dst_regno]);
/* this IS register fill, so keep insn_flags */
} else if (zero_cnt == size) {
/* similarly to mark_reg_stack_read(), preserve zeroes */
__mark_reg_const_zero(&state->regs[dst_regno]);
insn_flags = 0; /* not restoring original register state */
} else {
mark_reg_unknown(env, state->regs, dst_regno);
insn_flags = 0; /* not restoring original register state */
}
}
state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
return 0;
}
if (dst_regno >= 0) {
} else if (dst_regno >= 0) {
/* restore register state from stack */
copy_register_state(&state->regs[dst_regno], reg);
/* mark reg as written since spilled pointer state likely
@ -4780,7 +4842,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
if (dst_regno >= 0)
mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
insn_flags = 0; /* we are not restoring spilled register */
}
if (insn_flags)
return push_jmp_history(env, env->cur_state, insn_flags);
return 0;
}
@ -6940,7 +7005,6 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
if (err)
return err;
return 0;
}
@ -16910,7 +16974,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* the precision needs to be propagated back in
* the current state.
*/
err = err ? : push_jmp_history(env, cur);
if (is_jmp_point(env, env->insn_idx))
err = err ? : push_jmp_history(env, cur, 0);
err = err ? : propagate_precision(env, &sl->state);
if (err)
return err;
@ -17135,6 +17200,9 @@ static int do_check(struct bpf_verifier_env *env)
u8 class;
int err;
/* reset current history entry on each new instruction */
env->cur_hist_ent = NULL;
env->prev_insn_idx = prev_insn_idx;
if (env->insn_idx >= insn_cnt) {
verbose(env, "invalid insn idx %d insn_cnt %d\n",
@ -17174,7 +17242,7 @@ static int do_check(struct bpf_verifier_env *env)
}
if (is_jmp_point(env, env->insn_idx)) {
err = push_jmp_history(env, state);
err = push_jmp_history(env, state, 0);
if (err)
return err;
}

View file

@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include <../../../tools/include/linux/filter.h>
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
@ -450,4 +451,130 @@ l0_%=: r1 >>= 16; \
: __clobber_all);
}
SEC("raw_tp")
__log_level(2)
__success
__msg("fp-8=0m??mmmm")
__msg("fp-16=00mm??mm")
__msg("fp-24=00mm???m")
__naked void spill_subregs_preserve_stack_zero(void)
{
asm volatile (
"call %[bpf_get_prandom_u32];"
/* 32-bit subreg spill with ZERO, MISC, and INVALID */
".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */
"*(u8 *)(r10 -2) = r0;" /* MISC */
/* fp-3 and fp-4 stay INVALID */
"*(u32 *)(r10 -8) = r0;"
/* 16-bit subreg spill with ZERO, MISC, and INVALID */
".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */
"*(u16 *)(r10 -12) = r0;" /* MISC */
/* fp-13 and fp-14 stay INVALID */
"*(u16 *)(r10 -16) = r0;"
/* 8-bit subreg spill with ZERO, MISC, and INVALID */
".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */
"*(u16 *)(r10 -20) = r0;" /* MISC */
/* fp-21, fp-22, and fp-23 stay INVALID */
"*(u8 *)(r10 -24) = r0;"
"r0 = 0;"
"exit;"
:
: __imm(bpf_get_prandom_u32),
__imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)),
__imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)),
__imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0))
: __clobber_all);
}
char single_byte_buf[1] SEC(".data.single_byte_buf");
SEC("raw_tp")
__log_level(2)
__success
/* make sure fp-8 is all STACK_ZERO */
__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000")
/* but fp-16 is spilled IMPRECISE zero const reg */
__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
/* and now check that precision propagation works even for such tricky case */
__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=P0 R10=fp0 fp-16_w=0")
__msg("11: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6")
__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
__naked void partial_stack_load_preserves_zeros(void)
{
asm volatile (
/* fp-8 is all STACK_ZERO */
".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
/* fp-16 is const zero register */
"r0 = 0;"
"*(u64 *)(r10 -16) = r0;"
/* load single U8 from non-aligned STACK_ZERO slot */
"r1 = %[single_byte_buf];"
"r2 = *(u8 *)(r10 -1);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* load single U8 from non-aligned ZERO REG slot */
"r1 = %[single_byte_buf];"
"r2 = *(u8 *)(r10 -9);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* load single U16 from non-aligned STACK_ZERO slot */
"r1 = %[single_byte_buf];"
"r2 = *(u16 *)(r10 -2);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* load single U16 from non-aligned ZERO REG slot */
"r1 = %[single_byte_buf];"
"r2 = *(u16 *)(r10 -10);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* load single U32 from non-aligned STACK_ZERO slot */
"r1 = %[single_byte_buf];"
"r2 = *(u32 *)(r10 -4);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* load single U32 from non-aligned ZERO REG slot */
"r1 = %[single_byte_buf];"
"r2 = *(u32 *)(r10 -12);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* for completeness, load U64 from STACK_ZERO slot */
"r1 = %[single_byte_buf];"
"r2 = *(u64 *)(r10 -8);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
/* for completeness, load U64 from ZERO REG slot */
"r1 = %[single_byte_buf];"
"r2 = *(u64 *)(r10 -16);"
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
"r0 = 0;"
"exit;"
:
: __imm_ptr(single_byte_buf),
__imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0))
: __clobber_common);
}
char _license[] SEC("license") = "GPL";

View file

@ -589,11 +589,24 @@ static __u64 subprog_spill_reg_precise(void)
SEC("?raw_tp")
__success __log_level(2)
/* precision backtracking can't currently handle stack access not through r10,
* so we won't be able to mark stack slot fp-8 as precise, and so will
* fallback to forcing all as precise
*/
__msg("mark_precise: frame0: falling back to forcing all scalars precise")
__msg("10: (0f) r1 += r7")
__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)")
__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)")
__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2")
__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2")
__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6")
__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8")
__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10")
__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
__naked int subprog_spill_into_parent_stack_slot_precise(void)
{
asm volatile (
@ -628,14 +641,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void)
);
}
__naked __noinline __used
static __u64 subprog_with_checkpoint(void)
SEC("?raw_tp")
__success __log_level(2)
__msg("17: (0f) r1 += r0")
__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4")
__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)")
__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)")
__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32")
__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10")
__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8")
__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10")
__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit")
__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1")
__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15")
__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
__naked int stack_slot_aliases_precision(void)
{
asm volatile (
"r0 = 0;"
/* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */
"goto +0;"
"r6 = 1;"
/* pass r6 through r1 into subprog to get it back as r0;
* this whole chain will have to be marked as precise later
*/
"r1 = r6;"
"call identity_subprog;"
/* let's setup two registers that are aliased to r10 */
"r7 = r10;"
"r7 += -8;" /* r7 = r10 - 8 */
"r8 = r10;"
"r8 += -32;" /* r8 = r10 - 32 */
/* now spill subprog's return value (a r6 -> r1 -> r0 chain)
* a few times through different stack pointer regs, making
* sure to use r10, r7, and r8 both in LDX and STX insns, and
* *importantly* also using a combination of const var_off and
* insn->off to validate that we record final stack slot
* correctly, instead of relying on just insn->off derivation,
* which is only valid for r10-based stack offset
*/
"*(u64 *)(r10 - 16) = r0;"
"r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */
"*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */
"r0 = *(u64 *)(r8 + 16);"
"*(u64 *)(r7 - 8) = r0;"
"r0 = *(u64 *)(r10 - 16);"
/* get ready to use r0 as an index into array to force precision */
"r0 *= 4;"
"r1 = %[vals];"
/* here r0->r1->r6 chain is forced to be precise and has to be
* propagated back to the beginning, including through the
* subprog call and all the stack spills and loads
*/
"r1 += r0;"
"r0 = *(u32 *)(r1 + 0);"
"exit;"
:
: __imm_ptr(vals)
: __clobber_common, "r6"
);
}

View file

@ -140,10 +140,11 @@
.result = REJECT,
},
{
"precise: ST insn causing spi > allocated_stack",
"precise: ST zero to stack insn is supported",
.insns = {
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
/* not a register spill, so we stop precision propagation for R4 here */
BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
BPF_MOV64_IMM(BPF_REG_0, -1),
@ -157,11 +158,11 @@
mark_precise: frame0: last_idx 4 first_idx 2\
mark_precise: frame0: regs=r4 stack= before 4\
mark_precise: frame0: regs=r4 stack= before 3\
mark_precise: frame0: regs= stack=-8 before 2\
mark_precise: frame0: falling back to forcing all scalars precise\
force_precise: frame0: forcing r0 to be precise\
mark_precise: frame0: last_idx 5 first_idx 5\
mark_precise: frame0: parent state regs= stack=:",
mark_precise: frame0: parent state regs=r0 stack=:\
mark_precise: frame0: last_idx 4 first_idx 2\
mark_precise: frame0: regs=r0 stack= before 4\
5: R0=-1 R4=0",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
@ -169,6 +170,8 @@
"precise: STX insn causing spi > allocated_stack",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
/* make later reg spill more interesting by having somewhat known scalar */
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
@ -179,18 +182,21 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
.errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
.errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
mark_precise: frame0: parent state regs=r4 stack=:\
mark_precise: frame0: last_idx 5 first_idx 3\
mark_precise: frame0: regs=r4 stack= before 5\
mark_precise: frame0: regs=r4 stack= before 4\
mark_precise: frame0: regs= stack=-8 before 3\
mark_precise: frame0: falling back to forcing all scalars precise\
force_precise: frame0: forcing r0 to be precise\
force_precise: frame0: forcing r0 to be precise\
force_precise: frame0: forcing r0 to be precise\
force_precise: frame0: forcing r0 to be precise\
mark_precise: frame0: last_idx 6 first_idx 6\
mark_precise: frame0: last_idx 6 first_idx 4\
mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
mark_precise: frame0: parent state regs=r0 stack=:\
mark_precise: frame0: last_idx 3 first_idx 3\
mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\
mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\
mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\
mark_precise: frame0: parent state regs=r0 stack=:\
mark_precise: frame0: last_idx 0 first_idx 0\
mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\
mark_precise: frame0: last_idx 7 first_idx 7\
mark_precise: frame0: parent state regs= stack=:",
.result = VERBOSE_ACCEPT,
.retval = -1,