Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-07-21

The following pull-request contains BPF updates for your *net-next* tree.

We've added 46 non-merge commits during the last 6 day(s) which contain
a total of 68 files changed, 4929 insertions(+), 526 deletions(-).

The main changes are:

1) Run BPF program on socket lookup, from Jakub.

2) Introduce cpumap, from Lorenzo.

3) s390 JIT fixes, from Ilya.

4) teach riscv JIT to emit compressed insns, from Luke.

5) use build time computed BTF ids in bpf iter, from Yonghong.
====================

Purely independent overlapping changes in both filter.h and xdp.h

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-07-22 12:34:55 -07:00
commit dee72f8a0c
68 changed files with 4930 additions and 526 deletions

View File

@ -13,6 +13,11 @@
#include <linux/filter.h>
#include <asm/cacheflush.h>
static inline bool rvc_enabled(void)
{
return IS_ENABLED(CONFIG_RISCV_ISA_C);
}
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
@ -48,9 +53,21 @@ enum {
RV_REG_T6 = 31,
};
static inline bool is_creg(u8 reg)
{
return (1 << reg) & (BIT(RV_REG_FP) |
BIT(RV_REG_S1) |
BIT(RV_REG_A0) |
BIT(RV_REG_A1) |
BIT(RV_REG_A2) |
BIT(RV_REG_A3) |
BIT(RV_REG_A4) |
BIT(RV_REG_A5));
}
struct rv_jit_context {
struct bpf_prog *prog;
u32 *insns; /* RV insns */
u16 *insns; /* RV insns */
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
@ -58,6 +75,12 @@ struct rv_jit_context {
int stack_size;
};
/* Convert from ninsns to bytes. */
static inline int ninsns_rvoff(int ninsns)
{
return ninsns << 1;
}
struct rv_jit_data {
struct bpf_binary_header *header;
u8 *image;
@ -74,8 +97,22 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
/* Emit a 4-byte riscv instruction. */
static inline void emit(const u32 insn, struct rv_jit_context *ctx)
{
if (ctx->insns) {
ctx->insns[ctx->ninsns] = insn;
ctx->insns[ctx->ninsns + 1] = (insn >> 16);
}
ctx->ninsns += 2;
}
/* Emit a 2-byte riscv compressed instruction. */
static inline void emitc(const u16 insn, struct rv_jit_context *ctx)
{
BUILD_BUG_ON(!rvc_enabled());
if (ctx->insns)
ctx->insns[ctx->ninsns] = insn;
@ -86,7 +123,7 @@ static inline int epilogue_offset(struct rv_jit_context *ctx)
{
int to = ctx->epilogue_offset, from = ctx->ninsns;
return (to - from) << 2;
return ninsns_rvoff(to - from);
}
/* Return -1 or inverted cond. */
@ -117,6 +154,36 @@ static inline int invert_bpf_cond(u8 cond)
return -1;
}
static inline bool is_6b_int(long val)
{
return -(1L << 5) <= val && val < (1L << 5);
}
static inline bool is_7b_uint(unsigned long val)
{
return val < (1UL << 7);
}
static inline bool is_8b_uint(unsigned long val)
{
return val < (1UL << 8);
}
static inline bool is_9b_uint(unsigned long val)
{
return val < (1UL << 9);
}
static inline bool is_10b_int(long val)
{
return -(1L << 9) <= val && val < (1L << 9);
}
static inline bool is_10b_uint(unsigned long val)
{
return val < (1UL << 10);
}
static inline bool is_12b_int(long val)
{
return -(1L << 11) <= val && val < (1L << 11);
@ -149,7 +216,7 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
off++; /* BPF branch is from PC+1, RV is from PC */
from = (insn > 0) ? ctx->offset[insn - 1] : 0;
to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
return (to - from) << 2;
return ninsns_rvoff(to - from);
}
/* Instruction formats. */
@ -207,6 +274,59 @@ static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
}
/* RISC-V compressed instruction formats. */
static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op)
{
return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op;
}
static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op)
{
u32 imm;
imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
return (funct3 << 13) | (rd << 7) | op | imm;
}
static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op)
{
return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op;
}
static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op)
{
return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op;
}
static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd,
u8 op)
{
return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
(imm_lo << 5) | ((rd & 0x7) << 2) | op;
}
static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2,
u8 op)
{
return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
(imm_lo << 5) | ((rs2 & 0x7) << 2) | op;
}
static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op)
{
return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) |
((rs2 & 0x7) << 2) | op;
}
static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op)
{
u32 imm;
imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm;
}
/* Instructions shared by both RV32 and RV64. */
static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
@ -414,6 +534,135 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
}
/* RVC instrutions. */
static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
{
u32 imm;
imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) |
((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3);
return rv_ciw_insn(0x0, imm, rd, 0x0);
}
static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1)
{
u32 imm_hi, imm_lo;
imm_hi = (imm7 & 0x38) >> 3;
imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0);
}
static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2)
{
u32 imm_hi, imm_lo;
imm_hi = (imm7 & 0x38) >> 3;
imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0);
}
static inline u16 rvc_addi(u8 rd, u32 imm6)
{
return rv_ci_insn(0, imm6, rd, 0x1);
}
static inline u16 rvc_li(u8 rd, u32 imm6)
{
return rv_ci_insn(0x2, imm6, rd, 0x1);
}
static inline u16 rvc_addi16sp(u32 imm10)
{
u32 imm;
imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) |
((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5);
return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1);
}
static inline u16 rvc_lui(u8 rd, u32 imm6)
{
return rv_ci_insn(0x3, imm6, rd, 0x1);
}
static inline u16 rvc_srli(u8 rd, u32 imm6)
{
return rv_cb_insn(0x4, imm6, 0, rd, 0x1);
}
static inline u16 rvc_srai(u8 rd, u32 imm6)
{
return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1);
}
static inline u16 rvc_andi(u8 rd, u32 imm6)
{
return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1);
}
static inline u16 rvc_sub(u8 rd, u8 rs)
{
return rv_ca_insn(0x23, rd, 0, rs, 0x1);
}
static inline u16 rvc_xor(u8 rd, u8 rs)
{
return rv_ca_insn(0x23, rd, 0x1, rs, 0x1);
}
static inline u16 rvc_or(u8 rd, u8 rs)
{
return rv_ca_insn(0x23, rd, 0x2, rs, 0x1);
}
static inline u16 rvc_and(u8 rd, u8 rs)
{
return rv_ca_insn(0x23, rd, 0x3, rs, 0x1);
}
static inline u16 rvc_slli(u8 rd, u32 imm6)
{
return rv_ci_insn(0, imm6, rd, 0x2);
}
static inline u16 rvc_lwsp(u8 rd, u32 imm8)
{
u32 imm;
imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c);
return rv_ci_insn(0x2, imm, rd, 0x2);
}
static inline u16 rvc_jr(u8 rs1)
{
return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2);
}
static inline u16 rvc_mv(u8 rd, u8 rs)
{
return rv_cr_insn(0x8, rd, rs, 0x2);
}
static inline u16 rvc_jalr(u8 rs1)
{
return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2);
}
static inline u16 rvc_add(u8 rd, u8 rs)
{
return rv_cr_insn(0x9, rd, rs, 0x2);
}
static inline u16 rvc_swsp(u32 imm8, u8 rs2)
{
u32 imm;
imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6);
return rv_css_insn(0x6, imm, rs2, 0x2);
}
/*
* RV64-only instructions.
*
@ -503,6 +752,234 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
/* RV64-only RVC instructions. */
static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1)
{
u32 imm_hi, imm_lo;
imm_hi = (imm8 & 0x38) >> 3;
imm_lo = (imm8 & 0xc0) >> 6;
return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0);
}
static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2)
{
u32 imm_hi, imm_lo;
imm_hi = (imm8 & 0x38) >> 3;
imm_lo = (imm8 & 0xc0) >> 6;
return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0);
}
static inline u16 rvc_subw(u8 rd, u8 rs)
{
return rv_ca_insn(0x27, rd, 0, rs, 0x1);
}
static inline u16 rvc_addiw(u8 rd, u32 imm6)
{
return rv_ci_insn(0x1, imm6, rd, 0x1);
}
static inline u16 rvc_ldsp(u8 rd, u32 imm9)
{
u32 imm;
imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38);
return rv_ci_insn(0x3, imm, rd, 0x2);
}
static inline u16 rvc_sdsp(u32 imm9, u8 rs2)
{
u32 imm;
imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6);
return rv_css_insn(0x7, imm, rs2, 0x2);
}
#endif /* __riscv_xlen == 64 */
/* Helper functions that emit RVC instructions when possible. */
static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd == RV_REG_RA && rs && !imm)
emitc(rvc_jalr(rs), ctx);
else if (rvc_enabled() && !rd && rs && !imm)
emitc(rvc_jr(rs), ctx);
else
emit(rv_jalr(rd, rs, imm), ctx);
}
static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd && rs)
emitc(rvc_mv(rd, rs), ctx);
else
emit(rv_addi(rd, rs, 0), ctx);
}
static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd && rd == rs1 && rs2)
emitc(rvc_add(rd, rs2), ctx);
else
emit(rv_add(rd, rs1, rs2), ctx);
}
static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf))
emitc(rvc_addi16sp(imm), ctx);
else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) &&
!(imm & 0x3) && imm)
emitc(rvc_addi4spn(rd, imm), ctx);
else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm))
emitc(rvc_addi(rd, imm), ctx);
else
emit(rv_addi(rd, rs, imm), ctx);
}
static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd && is_6b_int(imm))
emitc(rvc_li(rd, imm), ctx);
else
emit(rv_addi(rd, RV_REG_ZERO, imm), ctx);
}
static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm)
emitc(rvc_lui(rd, imm), ctx);
else
emit(rv_lui(rd, imm), ctx);
}
static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen)
emitc(rvc_slli(rd, imm), ctx);
else
emit(rv_slli(rd, rs, imm), ctx);
}
static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm))
emitc(rvc_andi(rd, imm), ctx);
else
emit(rv_andi(rd, rs, imm), ctx);
}
static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
emitc(rvc_srli(rd, imm), ctx);
else
emit(rv_srli(rd, rs, imm), ctx);
}
static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
emitc(rvc_srai(rd, imm), ctx);
else
emit(rv_srai(rd, rs, imm), ctx);
}
static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
emitc(rvc_sub(rd, rs2), ctx);
else
emit(rv_sub(rd, rs1, rs2), ctx);
}
static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
emitc(rvc_or(rd, rs2), ctx);
else
emit(rv_or(rd, rs1, rs2), ctx);
}
static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
emitc(rvc_and(rd, rs2), ctx);
else
emit(rv_and(rd, rs1, rs2), ctx);
}
static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
emitc(rvc_xor(rd, rs2), ctx);
else
emit(rv_xor(rd, rs1, rs2), ctx);
}
static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3))
emitc(rvc_lwsp(rd, off), ctx);
else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3))
emitc(rvc_lw(rd, off, rs1), ctx);
else
emit(rv_lw(rd, off, rs1), ctx);
}
static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3))
emitc(rvc_swsp(off, rs2), ctx);
else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3))
emitc(rvc_sw(rs1, off, rs2), ctx);
else
emit(rv_sw(rs1, off, rs2), ctx);
}
/* RV64-only helper functions. */
#if __riscv_xlen == 64
static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rd && rd == rs && is_6b_int(imm))
emitc(rvc_addiw(rd, imm), ctx);
else
emit(rv_addiw(rd, rs, imm), ctx);
}
static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7))
emitc(rvc_ldsp(rd, off), ctx);
else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7))
emitc(rvc_ld(rd, off, rs1), ctx);
else
emit(rv_ld(rd, off, rs1), ctx);
}
static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7))
emitc(rvc_sdsp(off, rs2), ctx);
else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7))
emitc(rvc_sd(rs1, off, rs2), ctx);
else
emit(rv_sd(rs1, off, rs2), ctx);
}
static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
emitc(rvc_subw(rd, rs2), ctx);
else
emit(rv_subw(rd, rs1, rs2), ctx);
}
#endif /* __riscv_xlen == 64 */
void bpf_jit_build_prologue(struct rv_jit_context *ctx);

View File

@ -644,7 +644,7 @@ static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff,
e = ctx->ninsns;
/* Adjust for extra insns. */
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
return 0;
}
@ -713,7 +713,7 @@ static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx)
if (far) {
e = ctx->ninsns;
/* Adjust for extra insns. */
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
}
return 0;
@ -731,7 +731,7 @@ static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff,
e = ctx->ninsns;
/* Adjust for extra insns. */
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx))
return -1;
@ -795,7 +795,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
* if (index >= max_entries)
* goto out;
*/
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
/*
@ -804,7 +804,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
* goto out;
*/
emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
/*
@ -818,7 +818,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx);
/*
@ -1214,7 +1214,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_imm32(tmp2, imm, ctx);
src = tmp2;
e = ctx->ninsns;
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
}
if (is64)

View File

@ -132,19 +132,23 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
*
* This also means that we need to process LSB to MSB.
*/
s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff;
s64 upper = (val + (1 << 11)) >> 12;
/* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
* and addi are signed and RVC checks will perform signed comparisons.
*/
s64 lower = ((val & 0xfff) << 52) >> 52;
int shift;
if (is_32b_int(val)) {
if (upper)
emit(rv_lui(rd, upper), ctx);
emit_lui(rd, upper, ctx);
if (!upper) {
emit(rv_addi(rd, RV_REG_ZERO, lower), ctx);
emit_li(rd, lower, ctx);
return;
}
emit(rv_addiw(rd, rd, lower), ctx);
emit_addiw(rd, rd, lower, ctx);
return;
}
@ -154,9 +158,9 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
emit_imm(rd, upper, ctx);
emit(rv_slli(rd, rd, shift), ctx);
emit_slli(rd, rd, shift, ctx);
if (lower)
emit(rv_addi(rd, rd, lower), ctx);
emit_addi(rd, rd, lower, ctx);
}
static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
@ -164,43 +168,43 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
if (seen_reg(RV_REG_RA, ctx)) {
emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
if (seen_reg(RV_REG_S1, ctx)) {
emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S2, ctx)) {
emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S3, ctx)) {
emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S4, ctx)) {
emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S5, ctx)) {
emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S6, ctx)) {
emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
/* Set return value. */
if (!is_tail_call)
emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
is_tail_call ? 4 : 0), /* skip TCC init */
ctx);
emit_mv(RV_REG_A0, RV_REG_A5, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
is_tail_call ? 4 : 0, /* skip TCC init */
ctx);
}
static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
@ -280,8 +284,8 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
{
emit(rv_slli(reg, reg, 32), ctx);
emit(rv_srli(reg, reg, 32), ctx);
emit_slli(reg, reg, 32, ctx);
emit_srli(reg, reg, 32, ctx);
}
static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
@ -304,35 +308,35 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
/* if (TCC-- < 0)
* goto out;
*/
emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
emit_addi(RV_REG_T1, tcc, -1, ctx);
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
/* prog = array->ptrs[index];
* if (!prog)
* goto out;
*/
emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx);
emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
off = offsetof(struct bpf_array, ptrs);
if (is_12b_check(off, insn))
return -1;
emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
/* goto *(prog->bpf_func + 4); */
off = offsetof(struct bpf_prog, bpf_func);
if (is_12b_check(off, insn))
return -1;
emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
emit_mv(RV_REG_TCC, RV_REG_T1, ctx);
__build_epilogue(true, ctx);
return 0;
}
@ -360,9 +364,9 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
emit_mv(RV_REG_T2, *rd, ctx);
emit_zext_32(RV_REG_T2, ctx);
emit(rv_addi(RV_REG_T1, *rs, 0), ctx);
emit_mv(RV_REG_T1, *rs, ctx);
emit_zext_32(RV_REG_T1, ctx);
*rd = RV_REG_T2;
*rs = RV_REG_T1;
@ -370,15 +374,15 @@ static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
emit(rv_addiw(RV_REG_T1, *rs, 0), ctx);
emit_addiw(RV_REG_T2, *rd, 0, ctx);
emit_addiw(RV_REG_T1, *rs, 0, ctx);
*rd = RV_REG_T2;
*rs = RV_REG_T1;
}
static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
{
emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
emit_mv(RV_REG_T2, *rd, ctx);
emit_zext_32(RV_REG_T2, ctx);
emit_zext_32(RV_REG_T1, ctx);
*rd = RV_REG_T2;
@ -386,7 +390,7 @@ static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
{
emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
emit_addiw(RV_REG_T2, *rd, 0, ctx);
*rd = RV_REG_T2;
}
@ -432,7 +436,7 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
if (ret)
return ret;
rd = bpf_to_rv_reg(BPF_REG_0, ctx);
emit(rv_addi(rd, RV_REG_A0, 0), ctx);
emit_mv(rd, RV_REG_A0, ctx);
return 0;
}
@ -458,7 +462,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_zext_32(rd, ctx);
break;
}
emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
emit_mv(rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@ -466,31 +470,35 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = dst OP src */
case BPF_ALU | BPF_ADD | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_X:
emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
emit_add(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
if (is64)
emit_sub(rd, rd, rs, ctx);
else
emit_subw(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
emit(rv_and(rd, rd, rs), ctx);
emit_and(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
emit(rv_or(rd, rd, rs), ctx);
emit_or(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
emit(rv_xor(rd, rd, rs), ctx);
emit_xor(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@ -534,8 +542,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = -dst */
case BPF_ALU | BPF_NEG:
case BPF_ALU64 | BPF_NEG:
emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
rv_subw(rd, RV_REG_ZERO, rd), ctx);
emit_sub(rd, RV_REG_ZERO, rd, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@ -544,8 +551,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
emit(rv_slli(rd, rd, 48), ctx);
emit(rv_srli(rd, rd, 48), ctx);
emit_slli(rd, rd, 48, ctx);
emit_srli(rd, rd, 48, ctx);
break;
case 32:
if (!aux->verifier_zext)
@ -558,51 +565,51 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
emit_li(RV_REG_T2, 0, ctx);
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
if (imm == 16)
goto out_be;
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
if (imm == 32)
goto out_be;
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
emit(rv_srli(rd, rd, 8), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
emit_srli(rd, rd, 8, ctx);
out_be:
emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
emit_andi(RV_REG_T1, rd, 0xff, ctx);
emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
emit(rv_addi(rd, RV_REG_T2, 0), ctx);
emit_mv(rd, RV_REG_T2, ctx);
break;
/* dst = imm */
@ -617,12 +624,10 @@ out_be:
case BPF_ALU | BPF_ADD | BPF_K:
case BPF_ALU64 | BPF_ADD | BPF_K:
if (is_12b_int(imm)) {
emit(is64 ? rv_addi(rd, rd, imm) :
rv_addiw(rd, rd, imm), ctx);
emit_addi(rd, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
rv_addw(rd, rd, RV_REG_T1), ctx);
emit_add(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@ -630,12 +635,10 @@ out_be:
case BPF_ALU | BPF_SUB | BPF_K:
case BPF_ALU64 | BPF_SUB | BPF_K:
if (is_12b_int(-imm)) {
emit(is64 ? rv_addi(rd, rd, -imm) :
rv_addiw(rd, rd, -imm), ctx);
emit_addi(rd, rd, -imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
rv_subw(rd, rd, RV_REG_T1), ctx);
emit_sub(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@ -643,10 +646,10 @@ out_be:
case BPF_ALU | BPF_AND | BPF_K:
case BPF_ALU64 | BPF_AND | BPF_K:
if (is_12b_int(imm)) {
emit(rv_andi(rd, rd, imm), ctx);
emit_andi(rd, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_and(rd, rd, RV_REG_T1), ctx);
emit_and(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@ -657,7 +660,7 @@ out_be:
emit(rv_ori(rd, rd, imm), ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_or(rd, rd, RV_REG_T1), ctx);
emit_or(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@ -668,7 +671,7 @@ out_be:
emit(rv_xori(rd, rd, imm), ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_xor(rd, rd, RV_REG_T1), ctx);
emit_xor(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@ -699,19 +702,28 @@ out_be:
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
emit_slli(rd, rd, imm, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU64 | BPF_RSH | BPF_K:
emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
if (is64)
emit_srli(rd, rd, imm, ctx);
else
emit(rv_srliw(rd, rd, imm), ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU64 | BPF_ARSH | BPF_K:
emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
if (is64)
emit_srai(rd, rd, imm, ctx);
else
emit(rv_sraiw(rd, rd, imm), ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@ -757,13 +769,13 @@ out_be:
e = ctx->ninsns;
/* Adjust for extra insns */
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
}
if (BPF_OP(code) == BPF_JSET) {
/* Adjust for and */
rvoff -= 4;
emit(rv_and(RV_REG_T1, rd, rs), ctx);
emit_and(RV_REG_T1, rd, rs, ctx);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
ctx);
} else {
@ -810,7 +822,7 @@ out_be:
e = ctx->ninsns;
/* Adjust for extra insns */
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
break;
@ -819,19 +831,19 @@ out_be:
rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns;
if (is_12b_int(imm)) {
emit(rv_andi(RV_REG_T1, rd, imm), ctx);
emit_andi(RV_REG_T1, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
}
/* For jset32, we should clear the upper 32 bits of t1, but
* sign-extension is sufficient here and saves one instruction,
* as t1 is used only in comparison against zero.
*/
if (!is64 && imm < 0)
emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
e = ctx->ninsns;
rvoff -= (e - s) << 2;
rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
break;
@ -887,7 +899,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
@ -899,7 +911,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
@ -911,20 +923,20 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_LDX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
emit(rv_ld(rd, off, rs), ctx);
emit_ld(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
emit(rv_ld(rd, 0, RV_REG_T1), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit_ld(rd, 0, RV_REG_T1, ctx);
break;
/* ST: *(size *)(dst + off) = imm */
@ -936,7 +948,7 @@ out_be:
}
emit_imm(RV_REG_T2, off, ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
break;
@ -948,30 +960,30 @@ out_be:
}
emit_imm(RV_REG_T2, off, ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
break;
case BPF_ST | BPF_MEM | BPF_W:
emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
emit(rv_sw(rd, off, RV_REG_T1), ctx);
emit_sw(rd, off, RV_REG_T1, ctx);
break;
}
emit_imm(RV_REG_T2, off, ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx);
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
break;
case BPF_ST | BPF_MEM | BPF_DW:
emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
emit(rv_sd(rd, off, RV_REG_T1), ctx);
emit_sd(rd, off, RV_REG_T1, ctx);
break;
}
emit_imm(RV_REG_T2, off, ctx);
emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx);
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
break;
/* STX: *(size *)(dst + off) = src */
@ -982,7 +994,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit(rv_sb(RV_REG_T1, 0, rs), ctx);
break;
case BPF_STX | BPF_MEM | BPF_H:
@ -992,28 +1004,28 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit(rv_sh(RV_REG_T1, 0, rs), ctx);
break;
case BPF_STX | BPF_MEM | BPF_W:
if (is_12b_int(off)) {
emit(rv_sw(rd, off, rs), ctx);
emit_sw(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
emit(rv_sw(RV_REG_T1, 0, rs), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit_sw(RV_REG_T1, 0, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
emit(rv_sd(rd, off, rs), ctx);
emit_sd(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
emit(rv_sd(RV_REG_T1, 0, rs), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit_sd(RV_REG_T1, 0, rs, ctx);
break;
/* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W:
@ -1021,10 +1033,10 @@ out_be:
case BPF_STX | BPF_XADD | BPF_DW:
if (off) {
if (is_12b_int(off)) {
emit(rv_addi(RV_REG_T1, rd, off), ctx);
emit_addi(RV_REG_T1, rd, off, ctx);
} else {
emit_imm(RV_REG_T1, off, ctx);
emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
}
rd = RV_REG_T1;
@ -1073,52 +1085,53 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
/* First instruction is always setting the tail-call-counter
* (TCC) register. This instruction is skipped for tail calls.
* Force using a 4-byte (non-compressed) instruction.
*/
emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
if (seen_reg(RV_REG_RA, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
store_offset -= 8;
}
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
store_offset -= 8;
if (seen_reg(RV_REG_S1, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S2, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S3, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S4, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S5, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S6, ctx)) {
emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx);
emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
store_offset -= 8;
}
emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
if (bpf_stack_adjust)
emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx);
emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
/* Program contains calls and tail calls, so RV_REG_TCC need
* to be saved across calls.
*/
if (seen_tail_call(ctx) && seen_call(ctx))
emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx);
emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
ctx->stack_size = stack_adjust;
}

View File

@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (ctx->offset) {
extra_pass = true;
image_size = sizeof(u32) * ctx->ninsns;
image_size = sizeof(*ctx->insns) * ctx->ninsns;
goto skip_init_ctx;
}
@ -103,7 +103,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (jit_data->header)
break;
image_size = sizeof(u32) * ctx->ninsns;
image_size = sizeof(*ctx->insns) * ctx->ninsns;
jit_data->header =
bpf_jit_binary_alloc(image_size,
&jit_data->image,
@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_offset;
}
ctx->insns = (u32 *)jit_data->image;
ctx->insns = (u16 *)jit_data->image;
/*
* Now, when the image is allocated, the image can
* potentially shrink more (auipc/jalr -> jal).

View File

@ -489,6 +489,24 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
} while (re <= last);
}
static void bpf_skip(struct bpf_jit *jit, int size)
{
if (size >= 6 && !is_valid_rel(size)) {
/* brcl 0xf,size */
EMIT6_PCREL_RIL(0xc0f4000000, size);
size -= 6;
} else if (size >= 4 && is_valid_rel(size)) {
/* brc 0xf,size */
EMIT4_PCREL(0xa7f40000, size);
size -= 4;
}
while (size >= 2) {
/* bcr 0,%0 */
_EMIT2(0x0700);
size -= 2;
}
}
/*
* Emit function prologue
*
@ -501,10 +519,11 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
/* j tail_call_start: NOP if no tail calls are used */
EMIT4_PCREL(0xa7f40000, 6);
/* bcr 0,%0 */
EMIT2(0x0700, 0, REG_0);
/*
* There are no tail calls. Insert nops in order to have
* tail_call_start at a predictable offset.
*/
bpf_skip(jit, 6);
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
@ -1268,8 +1287,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
last = (i == fp->len - 1) ? 1 : 0;
if (last)
break;
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
/* brc 0xf, <exit> */
EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
else
/* brcl 0xf, <exit> */
EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
break;
/*
* Branch relative (number of skipped instructions) to offset on
@ -1417,21 +1440,10 @@ branch_ks:
}
break;
branch_ku:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
/* clfi or clgfi %dst,imm */
EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
dst_reg, imm);
if (!is_first_pass(jit) &&
can_use_rel(jit, addrs[i + off + 1])) {
/* brc mask,off */
EMIT4_PCREL_RIC(0xa7040000,
mask >> 12, addrs[i + off + 1]);
} else {
/* brcl mask,off */
EMIT6_PCREL_RILC(0xc0040000,
mask >> 12, addrs[i + off + 1]);
}
break;
/* lgfi %w1,imm (load sign extend imm) */
src_reg = REG_1;
EMIT6_IMM(0xc0010000, src_reg, imm);
goto branch_xu;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
if (!is_first_pass(jit) &&
@ -1510,7 +1522,14 @@ static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
*/
static int bpf_set_addr(struct bpf_jit *jit, int i)
{
if (!bpf_is_new_addr_sane(jit, i))
int delta;
if (is_codegen_pass(jit)) {
delta = jit->prg - jit->addrs[i];
if (delta < 0)
bpf_skip(jit, -delta);
}
if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
return -1;
jit->addrs[i] = jit->prg;
return 0;

View File

@ -8,6 +8,7 @@
enum netns_bpf_attach_type {
NETNS_BPF_INVALID = -1,
NETNS_BPF_FLOW_DISSECTOR = 0,
NETNS_BPF_SK_LOOKUP,
MAX_NETNS_BPF_ATTACH_TYPE
};
@ -17,6 +18,8 @@ to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
switch (attach_type) {
case BPF_FLOW_DISSECTOR:
return NETNS_BPF_FLOW_DISSECTOR;
case BPF_SK_LOOKUP:
return NETNS_BPF_SK_LOOKUP;
default:
return NETNS_BPF_INVALID;
}

View File

@ -249,6 +249,7 @@ enum bpf_arg_type {
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
@ -667,6 +668,7 @@ struct bpf_jit_poke_descriptor {
struct bpf_ctx_arg_aux {
u32 offset;
enum bpf_reg_type reg_type;
u32 btf_id;
};
struct bpf_prog_aux {
@ -928,6 +930,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
struct bpf_prog *old_prog);
int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index);
int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
struct bpf_prog *prog);
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt);
@ -1272,6 +1277,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
bool cpu_map_prog_allowed(struct bpf_map *map);
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@ -1432,6 +1438,11 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
return 0;
}
static inline bool cpu_map_prog_allowed(struct bpf_map *map)
{
return false;
}
static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
enum bpf_prog_type type)
{
@ -1531,7 +1542,6 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
void init_btf_sock_ids(struct btf *btf);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
@ -1557,9 +1567,6 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
static inline void init_btf_sock_ids(struct btf *btf)
{
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_BPF_STREAM_PARSER)

View File

@ -64,6 +64,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport,
struct sk_reuseport_md, struct sk_reuseport_kern)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_LOOKUP, sk_lookup,
struct bpf_sk_lookup, struct bpf_sk_lookup_kern)
#endif
#if defined(CONFIG_BPF_JIT)
BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,

View File

@ -57,17 +57,20 @@ asm( \
* .zero 4
*
*/
#define __BTF_ID_LIST(name) \
#define __BTF_ID_LIST(name, scope) \
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
".local " #name "; \n" \
"." #scope " " #name "; \n" \
#name ":; \n" \
".popsection; \n"); \
#define BTF_ID_LIST(name) \
__BTF_ID_LIST(name) \
__BTF_ID_LIST(name, local) \
extern u32 name[];
#define BTF_ID_LIST_GLOBAL(name) \
__BTF_ID_LIST(name, globl)
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@ -90,7 +93,38 @@ asm( \
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
#endif /* CONFIG_DEBUG_INFO_BTF */
#ifdef CONFIG_NET
/* Define a list of socket types which can be the argument for
* skc_to_*_sock() helpers. All these sockets should have
* sock_common as the first argument in its memory layout.
*/
#define BTF_SOCK_TYPE_xxx \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
enum {
#define BTF_SOCK_TYPE(name, str) name,
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE
MAX_BTF_SOCK_TYPE,
};
extern u32 btf_sock_ids[];
#endif
#endif

View File

@ -1278,4 +1278,151 @@ struct bpf_sockopt_kern {
int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len);
struct bpf_sk_lookup_kern {
u16 family;
u16 protocol;
struct {
__be32 saddr;
__be32 daddr;
} v4;
struct {
const struct in6_addr *saddr;
const struct in6_addr *daddr;
} v6;
__be16 sport;
u16 dport;
struct sock *selected_sk;
bool no_reuseport;
};
extern struct static_key_false bpf_sk_lookup_enabled;
/* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup.
*
* Allowed return values for a BPF SK_LOOKUP program are SK_PASS and
* SK_DROP. Their meaning is as follows:
*
* SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result
* SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup
* SK_DROP : terminate lookup with -ECONNREFUSED
*
* This macro aggregates return values and selected sockets from
* multiple BPF programs according to following rules in order:
*
* 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk,
* macro result is SK_PASS and last ctx.selected_sk is used.
* 2. If any program returned SK_DROP return value,
* macro result is SK_DROP.
* 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL.
*
* Caller must ensure that the prog array is non-NULL, and that the
* array as well as the programs it contains remain valid.
*/
#define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \
({ \
struct bpf_sk_lookup_kern *_ctx = &(ctx); \
struct bpf_prog_array_item *_item; \
struct sock *_selected_sk = NULL; \
bool _no_reuseport = false; \
struct bpf_prog *_prog; \
bool _all_pass = true; \
u32 _ret; \
\
migrate_disable(); \
_item = &(array)->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
/* restore most recent selection */ \
_ctx->selected_sk = _selected_sk; \
_ctx->no_reuseport = _no_reuseport; \
\
_ret = func(_prog, _ctx); \
if (_ret == SK_PASS && _ctx->selected_sk) { \
/* remember last non-NULL socket */ \
_selected_sk = _ctx->selected_sk; \
_no_reuseport = _ctx->no_reuseport; \
} else if (_ret == SK_DROP && _all_pass) { \
_all_pass = false; \
} \
_item++; \
} \
_ctx->selected_sk = _selected_sk; \
_ctx->no_reuseport = _no_reuseport; \
migrate_enable(); \
_all_pass || _selected_sk ? SK_PASS : SK_DROP; \
})
static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 dport,
struct sock **psk)
{
struct bpf_prog_array *run_array;
struct sock *selected_sk = NULL;
bool no_reuseport = false;
rcu_read_lock();
run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
if (run_array) {
struct bpf_sk_lookup_kern ctx = {
.family = AF_INET,
.protocol = protocol,
.v4.saddr = saddr,
.v4.daddr = daddr,
.sport = sport,
.dport = dport,
};
u32 act;
act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
if (act == SK_PASS) {
selected_sk = ctx.selected_sk;
no_reuseport = ctx.no_reuseport;
} else {
selected_sk = ERR_PTR(-ECONNREFUSED);
}
}
rcu_read_unlock();
*psk = selected_sk;
return no_reuseport;
}
#if IS_ENABLED(CONFIG_IPV6)
static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
const u16 dport,
struct sock **psk)
{
struct bpf_prog_array *run_array;
struct sock *selected_sk = NULL;
bool no_reuseport = false;
rcu_read_lock();
run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
if (run_array) {
struct bpf_sk_lookup_kern ctx = {
.family = AF_INET6,
.protocol = protocol,
.v6.saddr = saddr,
.v6.daddr = daddr,
.sport = sport,
.dport = dport,
};
u32 act;
act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
if (act == SK_PASS) {
selected_sk = ctx.selected_sk;
no_reuseport = ctx.no_reuseport;
} else {
selected_sk = ERR_PTR(-ECONNREFUSED);
}
}
rcu_read_unlock();
*psk = selected_sk;
return no_reuseport;
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif /* __LINUX_FILTER_H__ */

View File

@ -104,6 +104,7 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */
};
static inline struct skb_shared_info *
xdp_get_shared_info_from_frame(struct xdp_frame *frame)
{
@ -113,6 +114,12 @@ xdp_get_shared_info_from_frame(struct xdp_frame *frame)
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
}
struct xdp_cpumap_stats {
unsigned int redirect;
unsigned int pass;
unsigned int drop;
};
/* Clear kernel pointers in xdp_frame */
static inline void xdp_scrub_frame(struct xdp_frame *frame)
{
@ -136,39 +143,48 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
xdp->frame_sz = frame->frame_sz;
}
/* Convert xdp_buff to xdp_frame */
static inline
struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
int xdp_update_frame_from_buff(struct xdp_buff *xdp,
struct xdp_frame *xdp_frame)
{
struct xdp_frame *xdp_frame;
int metasize;
int headroom;
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
return xdp_convert_zc_to_xdp_frame(xdp);
int metasize, headroom;
/* Assure headroom is available for storing info */
headroom = xdp->data - xdp->data_hard_start;
metasize = xdp->data - xdp->data_meta;
metasize = metasize > 0 ? metasize : 0;
if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
return NULL;
return -ENOSPC;
/* Catch if driver didn't reserve tailroom for skb_shared_info */
if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
XDP_WARN("Driver BUG: missing reserved tailroom");
return NULL;
return -ENOSPC;
}
/* Store info in top of packet */
xdp_frame = xdp->data_hard_start;
xdp_frame->data = xdp->data;
xdp_frame->len = xdp->data_end - xdp->data;
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
xdp_frame->frame_sz = xdp->frame_sz;
return 0;
}
/* Convert xdp_buff to xdp_frame */
static inline
struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
{
struct xdp_frame *xdp_frame;
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
return xdp_convert_zc_to_xdp_frame(xdp);
/* Store info in top of packet */
xdp_frame = xdp->data_hard_start;
if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
return NULL;
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
xdp_frame->mem = xdp->rxq->mem;

View File

@ -177,9 +177,9 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
TRACE_EVENT(xdp_cpumap_kthread,
TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
int sched),
int sched, struct xdp_cpumap_stats *xdp_stats),
TP_ARGS(map_id, processed, drops, sched),
TP_ARGS(map_id, processed, drops, sched, xdp_stats),
TP_STRUCT__entry(
__field(int, map_id)
@ -188,6 +188,9 @@ TRACE_EVENT(xdp_cpumap_kthread,
__field(unsigned int, drops)
__field(unsigned int, processed)
__field(int, sched)
__field(unsigned int, xdp_pass)
__field(unsigned int, xdp_drop)
__field(unsigned int, xdp_redirect)
),
TP_fast_assign(
@ -197,16 +200,21 @@ TRACE_EVENT(xdp_cpumap_kthread,
__entry->drops = drops;
__entry->processed = processed;
__entry->sched = sched;
__entry->xdp_pass = xdp_stats->pass;
__entry->xdp_drop = xdp_stats->drop;
__entry->xdp_redirect = xdp_stats->redirect;
),
TP_printk("kthread"
" cpu=%d map_id=%d action=%s"
" processed=%u drops=%u"
" sched=%d",
" sched=%d"
" xdp_pass=%u xdp_drop=%u xdp_redirect=%u",
__entry->cpu, __entry->map_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->processed, __entry->drops,
__entry->sched)
__entry->sched,
__entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect)
);
TRACE_EVENT(xdp_cpumap_enqueue,

View File

@ -189,6 +189,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
};
enum bpf_attach_type {
@ -227,6 +228,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
BPF_CGROUP_INET_SOCK_RELEASE,
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
__MAX_BPF_ATTACH_TYPE
};
@ -2419,7 +2422,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@ -2456,7 +2459,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@ -3068,6 +3071,10 @@ union bpf_attr {
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
* **BPF_PROG_TYPE_SCHED_ACT** programs.
*
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
* will cause *skb* to be delivered to the specified socket.
@ -3093,6 +3100,56 @@ union bpf_attr {
* **-ESOCKTNOSUPPORT** if the socket type is not supported
* (reuseport).
*
* long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
*
* Select the *sk* as a result of a socket lookup.
*
* For the operation to succeed passed socket must be compatible
* with the packet description provided by the *ctx* object.
*
* L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
* be an exact match. While IP family (**AF_INET** or
* **AF_INET6**) must be compatible, that is IPv6 sockets
* that are not v6-only can be selected for IPv4 packets.
*
* Only TCP listeners and UDP unconnected sockets can be
* selected. *sk* can also be NULL to reset any previous
* selection.
*
* *flags* argument can combination of following values:
*
* * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
* socket selection, potentially done by a BPF program
* that ran before us.
*
* * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
* load-balancing within reuseport group for the socket
* being selected.
*
* On success *ctx->sk* will point to the selected socket.
*
* Return
* 0 on success, or a negative errno in case of failure.
*
* * **-EAFNOSUPPORT** if socket family (*sk->family*) is
* not compatible with packet family (*ctx->family*).
*
* * **-EEXIST** if socket has been already selected,
* potentially by another program, and
* **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
*
* * **-EINVAL** if unsupported flags were specified.
*
* * **-EPROTOTYPE** if socket L4 protocol
* (*sk->protocol*) doesn't match packet protocol
* (*ctx->protocol*).
*
* * **-ESOCKTNOSUPPORT** if socket is not in allowed
* state (TCP listening or UDP unconnected).
*
* u64 bpf_ktime_get_boot_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
@ -3606,6 +3663,12 @@ enum {
BPF_RINGBUF_HDR_SZ = 8,
};
/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
enum {
BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0),
BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1),
};
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@ -3849,6 +3912,19 @@ struct bpf_devmap_val {
} bpf_prog;
};
/* CPUMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
* New members can only be added to the end of this structure.
*/
struct bpf_cpumap_val {
__u32 qsize; /* queue size to remote target CPU */
union {
int fd; /* prog fd on map write */
__u32 id; /* prog id on map read */
} bpf_prog;
};
enum sk_action {
SK_DROP = 0,
SK_PASS,
@ -3986,7 +4062,7 @@ struct bpf_link_info {
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach attach type).
* attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
@ -4335,4 +4411,19 @@ struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
__u32 remote_ip4; /* Network byte order */
__u32 remote_ip6[4]; /* Network byte order */
__u32 remote_port; /* Network byte order */
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -3672,7 +3672,6 @@ struct btf *btf_parse_vmlinux(void)
goto errout;
bpf_struct_ops_init(btf, log);
init_btf_sock_ids(btf);
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
@ -3818,16 +3817,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return true;
/* this is a pointer to another type */
info->reg_type = PTR_TO_BTF_ID;
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
if (ctx_arg_info->offset == off) {
info->reg_type = ctx_arg_info->reg_type;
break;
info->btf_id = ctx_arg_info->btf_id;
return true;
}
}
info->reg_type = PTR_TO_BTF_ID;
if (tgt_prog) {
ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
if (ret > 0) {

View File

@ -1958,6 +1958,61 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
}
}
/**
* bpf_prog_array_delete_safe_at() - Replaces the program at the given
* index into the program array with
* a dummy no-op program.
* @array: a bpf_prog_array
* @index: the index of the program to replace
*
* Skips over dummy programs, by not counting them, when calculating
* the the position of the program to replace.
*
* Return:
* * 0 - Success
* * -EINVAL - Invalid index value. Must be a non-negative integer.
* * -ENOENT - Index out of range
*/
int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
{
return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
}
/**
* bpf_prog_array_update_at() - Updates the program at the given index
* into the program array.
* @array: a bpf_prog_array
* @index: the index of the program to update
* @prog: the program to insert into the array
*
* Skips over dummy programs, by not counting them, when calculating
* the position of the program to update.
*
* Return:
* * 0 - Success
* * -EINVAL - Invalid index value. Must be a non-negative integer.
* * -ENOENT - Index out of range
*/
int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
struct bpf_prog *prog)
{
struct bpf_prog_array_item *item;
if (unlikely(index < 0))
return -EINVAL;
for (item = array->items; item->prog; item++) {
if (item->prog == &dummy_bpf_prog.prog)
continue;
if (!index) {
WRITE_ONCE(item->prog, prog);
return 0;
}
index--;
}
return -ENOENT;
}
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,

View File

@ -52,7 +52,6 @@ struct xdp_bulk_queue {
struct bpf_cpu_map_entry {
u32 cpu; /* kthread CPU and map index */
int map_id; /* Back reference to map */
u32 qsize; /* Queue size placeholder for map lookup */
/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
struct xdp_bulk_queue __percpu *bulkq;
@ -62,10 +61,14 @@ struct bpf_cpu_map_entry {
/* Queue with potential multi-producers, and single-consumer kthread */
struct ptr_ring *queue;
struct task_struct *kthread;
struct work_struct kthread_stop_wq;
struct bpf_cpumap_val value;
struct bpf_prog *prog;
atomic_t refcnt; /* Control when this struct can be free'ed */
struct rcu_head rcu;
struct work_struct kthread_stop_wq;
};
struct bpf_cpu_map {
@ -80,6 +83,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
u64 cost;
@ -90,7 +94,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
(value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) ||
attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
cmap = kzalloc(sizeof(*cmap), GFP_USER);
@ -212,6 +218,8 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
{
if (atomic_dec_and_test(&rcpu->refcnt)) {
if (rcpu->prog)
bpf_prog_put(rcpu->prog);
/* The queue should be empty at this point */
__cpu_map_ring_cleanup(rcpu->queue);
ptr_ring_cleanup(rcpu->queue, NULL);
@ -220,6 +228,75 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
void **frames, int n,
struct xdp_cpumap_stats *stats)
{
struct xdp_rxq_info rxq;
struct xdp_buff xdp;
int i, nframes = 0;
if (!rcpu->prog)
return n;
rcu_read_lock_bh();
xdp_set_return_frame_no_direct();
xdp.rxq = &rxq;
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
u32 act;
int err;
rxq.dev = xdpf->dev_rx;
rxq.mem = xdpf->mem;
/* TODO: report queue_index to xdp_rxq_info */
xdp_convert_frame_to_buff(xdpf, &xdp);
act = bpf_prog_run_xdp(rcpu->prog, &xdp);
switch (act) {
case XDP_PASS:
err = xdp_update_frame_from_buff(&xdp, xdpf);
if (err < 0) {
xdp_return_frame(xdpf);
stats->drop++;
} else {
frames[nframes++] = xdpf;
stats->pass++;
}
break;
case XDP_REDIRECT:
err = xdp_do_redirect(xdpf->dev_rx, &xdp,
rcpu->prog);
if (unlikely(err)) {
xdp_return_frame(xdpf);
stats->drop++;
} else {
stats->redirect++;
}
break;
default:
bpf_warn_invalid_xdp_action(act);
/* fallthrough */
case XDP_DROP:
xdp_return_frame(xdpf);
stats->drop++;
break;
}
}
if (stats->redirect)
xdp_do_flush_map();
xdp_clear_return_frame_no_direct();
rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
return nframes;
}
#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data)
@ -234,11 +311,12 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
struct xdp_cpumap_stats stats = {}; /* zero stats */
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
unsigned int drops = 0, sched = 0;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
int i, n, m;
int i, n, m, nframes;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@ -259,8 +337,8 @@ static int cpu_map_kthread_run(void *data)
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
n = __ptr_ring_consume_batched(rcpu->queue, frames,
CPUMAP_BATCH);
for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page = virt_to_page(f);
@ -272,15 +350,19 @@ static int cpu_map_kthread_run(void *data)
prefetchw(page);
}
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < n; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
drops = n;
/* Support running another XDP prog on this CPU */
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
if (nframes) {
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < nframes; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
drops += nframes;
}
}
local_bh_disable();
for (i = 0; i < n; i++) {
for (i = 0; i < nframes; i++) {
struct xdp_frame *xdpf = frames[i];
struct sk_buff *skb = skbs[i];
int ret;
@ -297,7 +379,7 @@ static int cpu_map_kthread_run(void *data)
drops++;
}
/* Feedback loop via tracepoint */
trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
local_bh_enable(); /* resched point, may call do_softirq() */
}
@ -307,13 +389,38 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
int map_id)
bool cpu_map_prog_allowed(struct bpf_map *map)
{
return map->map_type == BPF_MAP_TYPE_CPUMAP &&
map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
}
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
{
struct bpf_prog *prog;
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
bpf_prog_put(prog);
return -EINVAL;
}
rcpu->value.bpf_prog.id = prog->aux->id;
rcpu->prog = prog;
return 0;
}
static struct bpf_cpu_map_entry *
__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
{
int numa, err, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
struct xdp_bulk_queue *bq;
int numa, err, i;
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
@ -338,19 +445,22 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
if (!rcpu->queue)
goto free_bulkq;
err = ptr_ring_init(rcpu->queue, qsize, gfp);
err = ptr_ring_init(rcpu->queue, value->qsize, gfp);
if (err)
goto free_queue;
rcpu->cpu = cpu;
rcpu->map_id = map_id;
rcpu->qsize = qsize;
rcpu->value.qsize = value->qsize;
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
goto free_ptr_ring;
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu, map_id);
if (IS_ERR(rcpu->kthread))
goto free_ptr_ring;
goto free_prog;
get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
@ -361,6 +471,9 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
return rcpu;
free_prog:
if (rcpu->prog)
bpf_prog_put(rcpu->prog);
free_ptr_ring:
ptr_ring_cleanup(rcpu->queue, NULL);
free_queue:
@ -437,12 +550,12 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
struct bpf_cpumap_val cpumap_value = {};
struct bpf_cpu_map_entry *rcpu;
/* Array index key correspond to CPU number */
u32 key_cpu = *(u32 *)key;
/* Value is the queue size */
u32 qsize = *(u32 *)value;
memcpy(&cpumap_value, value, map->value_size);
if (unlikely(map_flags > BPF_EXIST))
return -EINVAL;
@ -450,18 +563,18 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
if (unlikely(qsize > 16384)) /* sanity limit on qsize */
if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */
return -EOVERFLOW;
/* Make sure CPU is a valid possible cpu */
if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
return -ENODEV;
if (qsize == 0) {
if (cpumap_value.qsize == 0) {
rcpu = NULL; /* Same as deleting */
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
if (!rcpu)
return -ENOMEM;
rcpu->cmap = cmap;
@ -523,7 +636,7 @@ static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
struct bpf_cpu_map_entry *rcpu =
__cpu_map_lookup_elem(map, *(u32 *)key);
return rcpu ? &rcpu->qsize : NULL;
return rcpu ? &rcpu->value : NULL;
}
static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)

View File

@ -4,6 +4,7 @@
#include <linux/fs.h>
#include <linux/filter.h>
#include <linux/kernel.h>
#include <linux/btf_ids.h>
struct bpf_iter_seq_map_info {
u32 mid;
@ -81,7 +82,10 @@ static const struct seq_operations bpf_map_seq_ops = {
.show = bpf_map_seq_show,
};
static const struct bpf_iter_reg bpf_map_reg_info = {
BTF_ID_LIST(btf_bpf_map_id)
BTF_ID(struct, bpf_map)
static struct bpf_iter_reg bpf_map_reg_info = {
.target = "bpf_map",
.seq_ops = &bpf_map_seq_ops,
.init_seq_private = NULL,
@ -96,6 +100,7 @@ static const struct bpf_iter_reg bpf_map_reg_info = {
static int __init bpf_map_iter_init(void)
{
bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
return bpf_iter_reg_target(&bpf_map_reg_info);
}

View File

@ -25,6 +25,32 @@ struct bpf_netns_link {
/* Protects updates to netns_bpf */
DEFINE_MUTEX(netns_bpf_mutex);
static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
{
switch (type) {
#ifdef CONFIG_INET
case NETNS_BPF_SK_LOOKUP:
static_branch_dec(&bpf_sk_lookup_enabled);
break;
#endif
default:
break;
}
}
static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
{
switch (type) {
#ifdef CONFIG_INET
case NETNS_BPF_SK_LOOKUP:
static_branch_inc(&bpf_sk_lookup_enabled);
break;
#endif
default:
break;
}
}
/* Must be called with netns_bpf_mutex held. */
static void netns_bpf_run_array_detach(struct net *net,
enum netns_bpf_attach_type type)
@ -36,12 +62,50 @@ static void netns_bpf_run_array_detach(struct net *net,
bpf_prog_array_free(run_array);
}
static int link_index(struct net *net, enum netns_bpf_attach_type type,
struct bpf_netns_link *link)
{
struct bpf_netns_link *pos;
int i = 0;
list_for_each_entry(pos, &net->bpf.links[type], node) {
if (pos == link)
return i;
i++;
}
return -ENOENT;
}
static int link_count(struct net *net, enum netns_bpf_attach_type type)
{
struct list_head *pos;
int i = 0;
list_for_each(pos, &net->bpf.links[type])
i++;
return i;
}
static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
struct bpf_prog_array *prog_array)
{
struct bpf_netns_link *pos;
unsigned int i = 0;
list_for_each_entry(pos, &net->bpf.links[type], node) {
prog_array->items[i].prog = pos->link.prog;
i++;
}
}
static void bpf_netns_link_release(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
enum netns_bpf_attach_type type = net_link->netns_type;
struct bpf_prog_array *old_array, *new_array;
struct net *net;
int cnt, idx;
mutex_lock(&netns_bpf_mutex);
@ -53,9 +117,30 @@ static void bpf_netns_link_release(struct bpf_link *link)
if (!net)
goto out_unlock;
netns_bpf_run_array_detach(net, type);
/* Mark attach point as unused */
netns_bpf_attach_type_unneed(type);
/* Remember link position in case of safe delete */
idx = link_index(net, type, net_link);
list_del(&net_link->node);
cnt = link_count(net, type);
if (!cnt) {
netns_bpf_run_array_detach(net, type);
goto out_unlock;
}
old_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
if (!new_array) {
WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
goto out_unlock;
}
fill_prog_array(net, type, new_array);
rcu_assign_pointer(net->bpf.run_array[type], new_array);
bpf_prog_array_free(old_array);
out_unlock:
mutex_unlock(&netns_bpf_mutex);
}
@ -77,7 +162,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
enum netns_bpf_attach_type type = net_link->netns_type;
struct bpf_prog_array *run_array;
struct net *net;
int ret = 0;
int idx, ret;
if (old_prog && old_prog != link->prog)
return -EPERM;
@ -95,7 +180,10 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
run_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
WRITE_ONCE(run_array->items[0].prog, new_prog);
idx = link_index(net, type, net_link);
ret = bpf_prog_array_update_at(run_array, idx, new_prog);
if (ret)
goto out_unlock;
old_prog = xchg(&link->prog, new_prog);
bpf_prog_put(old_prog);
@ -309,18 +397,30 @@ int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
return ret;
}
static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
{
switch (type) {
case NETNS_BPF_FLOW_DISSECTOR:
return 1;
case NETNS_BPF_SK_LOOKUP:
return 64;
default:
return 0;
}
}
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
enum netns_bpf_attach_type type)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
struct bpf_prog_array *run_array;
int err;
int cnt, err;
mutex_lock(&netns_bpf_mutex);
/* Allow attaching only one prog or link for now */
if (!list_empty(&net->bpf.links[type])) {
cnt = link_count(net, type);
if (cnt >= netns_bpf_max_progs(type)) {
err = -E2BIG;
goto out_unlock;
}
@ -334,6 +434,9 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
case NETNS_BPF_FLOW_DISSECTOR:
err = flow_dissector_bpf_prog_attach_check(net, link->prog);
break;
case NETNS_BPF_SK_LOOKUP:
err = 0; /* nothing to check */
break;
default:
err = -EINVAL;
break;
@ -341,16 +444,22 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
if (err)
goto out_unlock;
run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
if (!run_array) {
err = -ENOMEM;
goto out_unlock;
}
run_array->items[0].prog = link->prog;
rcu_assign_pointer(net->bpf.run_array[type], run_array);
list_add_tail(&net_link->node, &net->bpf.links[type]);
fill_prog_array(net, type, run_array);
run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
lockdep_is_held(&netns_bpf_mutex));
bpf_prog_array_free(run_array);
/* Mark attach point as used */
netns_bpf_attach_type_need(type);
out_unlock:
mutex_unlock(&netns_bpf_mutex);
return err;
@ -426,8 +535,10 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
mutex_lock(&netns_bpf_mutex);
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
netns_bpf_run_array_detach(net, type);
list_for_each_entry(net_link, &net->bpf.links[type], node)
list_for_each_entry(net_link, &net->bpf.links[type], node) {
net_link->net = NULL; /* auto-detach link */
netns_bpf_attach_type_unneed(type);
}
if (net->bpf.progs[type])
bpf_prog_put(net->bpf.progs[type]);
}

View File

@ -2022,6 +2022,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
case BPF_PROG_TYPE_SK_LOOKUP:
if (expected_attach_type == BPF_SK_LOOKUP)
return 0;
return -EINVAL;
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
return -EINVAL;
@ -2756,6 +2760,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_SK_LOOKUP:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
if (!capable(CAP_NET_ADMIN))
@ -2817,6 +2822,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
case BPF_TRACE_ITER:
return BPF_PROG_TYPE_TRACING;
case BPF_SK_LOOKUP:
return BPF_PROG_TYPE_SK_LOOKUP;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@ -2953,6 +2960,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
case BPF_FLOW_DISSECTOR:
case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
@ -3891,6 +3899,7 @@ static int link_create(union bpf_attr *attr)
ret = tracing_bpf_link_attach(attr, prog);
break;
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_SK_LOOKUP:
ret = netns_bpf_link_create(attr, prog);
break;
default:

View File

@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/fdtable.h>
#include <linux/filter.h>
#include <linux/btf_ids.h>
struct bpf_iter_seq_task_common {
struct pid_namespace *ns;
@ -312,7 +313,11 @@ static const struct seq_operations task_file_seq_ops = {
.show = task_file_seq_show,
};
static const struct bpf_iter_reg task_reg_info = {
BTF_ID_LIST(btf_task_file_ids)
BTF_ID(struct, task_struct)
BTF_ID(struct, file)
static struct bpf_iter_reg task_reg_info = {
.target = "task",
.seq_ops = &task_seq_ops,
.init_seq_private = init_seq_pidns,
@ -325,7 +330,7 @@ static const struct bpf_iter_reg task_reg_info = {
},
};
static const struct bpf_iter_reg task_file_reg_info = {
static struct bpf_iter_reg task_file_reg_info = {
.target = "task_file",
.seq_ops = &task_file_seq_ops,
.init_seq_private = init_seq_pidns,
@ -344,10 +349,13 @@ static int __init task_iter_init(void)
{
int ret;
task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
ret = bpf_iter_reg_target(&task_reg_info);
if (ret)
return ret;
task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
return bpf_iter_reg_target(&task_file_reg_info);
}
late_initcall(task_iter_init);

View File

@ -3878,10 +3878,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
meta->ref_obj_id = reg->ref_obj_id;
}
} else if (arg_type == ARG_PTR_TO_SOCKET) {
} else if (arg_type == ARG_PTR_TO_SOCKET ||
arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
expected_type = PTR_TO_SOCKET;
if (type != expected_type)
goto err_type;
if (!(register_is_null(reg) &&
arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
if (type != expected_type)
goto err_type;
}
} else if (arg_type == ARG_PTR_TO_BTF_ID) {
expected_type = PTR_TO_BTF_ID;
if (type != expected_type)
@ -7354,6 +7358,9 @@ static int check_return_code(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
break;
case BPF_PROG_TYPE_SK_LOOKUP:
range = tnum_range(SK_DROP, SK_PASS);
break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.

View File

@ -5275,31 +5275,21 @@ static struct bpf_test tests[] = {
{ /* Mainly checking JIT here. */
"BPF_MAXINSNS: Ctx heavy transformations",
{ },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_EXPECTED_FAIL,
#else
CLASSIC,
#endif
{ },
{
{ 1, SKB_VLAN_PRESENT },
{ 10, SKB_VLAN_PRESENT }
},
.fill_helper = bpf_fill_maxinsns6,
.expected_errcode = -ENOTSUPP,
},
{ /* Mainly checking JIT here. */
"BPF_MAXINSNS: Call heavy transformations",
{ },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
#else
CLASSIC | FLAG_NO_DATA,
#endif
{ },
{ { 1, 0 }, { 10, 0 } },
.fill_helper = bpf_fill_maxinsns7,
.expected_errcode = -ENOTSUPP,
},
{ /* Mainly checking JIT here. */
"BPF_MAXINSNS: Jump heavy test",
@ -5350,28 +5340,18 @@ static struct bpf_test tests[] = {
{
"BPF_MAXINSNS: exec all MSH",
{ },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_EXPECTED_FAIL,
#else
CLASSIC,
#endif
{ 0xfa, 0xfb, 0xfc, 0xfd, },
{ { 4, 0xababab83 } },
.fill_helper = bpf_fill_maxinsns13,
.expected_errcode = -ENOTSUPP,
},
{
"BPF_MAXINSNS: ld_abs+get_processor_id",
{ },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_EXPECTED_FAIL,
#else
CLASSIC,
#endif
{ },
{ { 1, 0xbee } },
.fill_helper = bpf_fill_ld_abs_get_processor_id,
.expected_errcode = -ENOTSUPP,
},
/*
* LD_IND / LD_ABS on fragmented SKBs

View File

@ -5449,6 +5449,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
if (cpu_map_prog_allowed(new->aux->used_maps[i]))
return -EINVAL;
}
}
@ -8880,6 +8882,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
NL_SET_ERR_MSG(extack,
"BPF_XDP_CPUMAP programs can not be attached to a device");
bpf_prog_put(prog);
return -EINVAL;
}
/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);

View File

@ -9252,6 +9252,189 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
EXPORT_SYMBOL(bpf_sk_lookup_enabled);
BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
struct sock *, sk, u64, flags)
{
if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
BPF_SK_LOOKUP_F_NO_REUSEPORT)))
return -EINVAL;
if (unlikely(sk && sk_is_refcounted(sk)))
return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
return -ESOCKTNOSUPPORT; /* reject connected sockets */
/* Check if socket is suitable for packet L3/L4 protocol */
if (sk && sk->sk_protocol != ctx->protocol)
return -EPROTOTYPE;
if (sk && sk->sk_family != ctx->family &&
(sk->sk_family == AF_INET || ipv6_only_sock(sk)))
return -EAFNOSUPPORT;
if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
return -EEXIST;
/* Select socket as lookup result */
ctx->selected_sk = sk;
ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
return 0;
}
static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
.func = bpf_sk_lookup_assign,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
case BPF_FUNC_sk_assign:
return &bpf_sk_lookup_assign_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
return bpf_base_func_proto(func_id);
}
}
static bool sk_lookup_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
return false;
if (off % size != 0)
return false;
if (type != BPF_READ)
return false;
switch (off) {
case offsetof(struct bpf_sk_lookup, sk):
info->reg_type = PTR_TO_SOCKET_OR_NULL;
return size == sizeof(__u64);
case bpf_ctx_range(struct bpf_sk_lookup, family):
case bpf_ctx_range(struct bpf_sk_lookup, protocol):
case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
bpf_ctx_record_field_size(info, sizeof(__u32));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
default:
return false;
}
}
static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct bpf_sk_lookup, sk):
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
offsetof(struct bpf_sk_lookup_kern, selected_sk));
break;
case offsetof(struct bpf_sk_lookup, family):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
family, 2, target_size));
break;
case offsetof(struct bpf_sk_lookup, protocol):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
protocol, 2, target_size));
break;
case offsetof(struct bpf_sk_lookup, remote_ip4):
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
v4.saddr, 4, target_size));
break;
case offsetof(struct bpf_sk_lookup, local_ip4):
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
v4.daddr, 4, target_size));
break;
case bpf_ctx_range_till(struct bpf_sk_lookup,
remote_ip6[0], remote_ip6[3]): {
#if IS_ENABLED(CONFIG_IPV6)
int off = si->off;
off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
offsetof(struct bpf_sk_lookup_kern, v6.saddr));
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
#else
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
break;
}
case bpf_ctx_range_till(struct bpf_sk_lookup,
local_ip6[0], local_ip6[3]): {
#if IS_ENABLED(CONFIG_IPV6)
int off = si->off;
off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
offsetof(struct bpf_sk_lookup_kern, v6.daddr));
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
#else
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
break;
}
case offsetof(struct bpf_sk_lookup, remote_port):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
sport, 2, target_size));
break;
case offsetof(struct bpf_sk_lookup, local_port):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
dport, 2, target_size));
break;
}
return insn - insn_buf;
}
const struct bpf_prog_ops sk_lookup_prog_ops = {
};
const struct bpf_verifier_ops sk_lookup_verifier_ops = {
.get_func_proto = sk_lookup_func_proto,
.is_valid_access = sk_lookup_is_valid_access,
.convert_ctx_access = sk_lookup_convert_ctx_access,
};
#endif /* CONFIG_INET */
DEFINE_BPF_DISPATCHER(xdp)
@ -9261,52 +9444,13 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}
/* Define a list of socket types which can be the argument for
* skc_to_*_sock() helpers. All these sockets should have
* sock_common as the first argument in its memory layout.
*/
#define BTF_SOCK_TYPE_xxx \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
enum {
#define BTF_SOCK_TYPE(name, str) name,
#ifdef CONFIG_DEBUG_INFO_BTF
BTF_ID_LIST_GLOBAL(btf_sock_ids)
#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE
MAX_BTF_SOCK_TYPE,
};
static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
#ifdef CONFIG_BPF_SYSCALL
static const char *bpf_sock_types[] = {
#define BTF_SOCK_TYPE(name, str) str,
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE
};
void init_btf_sock_ids(struct btf *btf)
{
int i, btf_id;
for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
BTF_KIND_STRUCT);
if (btf_id > 0)
btf_sock_ids[i] = btf_id;
}
}
#else
u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
#endif
static bool check_arg_btf_id(u32 btf_id, u32 arg)

View File

@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
/*
* Here are some nice properties to exploit here. The BSD API
* does not allow a listening sock to specify the remote port nor the
@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
if (sk->sk_reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
}
result = lookup_reuseport(net, sk, skb, doff,
saddr, sport, daddr, hnum);
if (result)
return result;
result = sk;
hiscore = score;
}
@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
static inline struct sock *inet_lookup_run_bpf(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, u16 hnum)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
if (hashinfo != &tcp_hashinfo)
return NULL; /* only TCP is supported */
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
saddr, sport, daddr, hnum, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
saddr, sport, daddr, hnum);
if (result)
goto done;
}
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);

View File

@ -76,6 +76,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <linux/btf_ids.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@ -2946,7 +2947,7 @@ static void bpf_iter_fini_tcp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
static const struct bpf_iter_reg tcp_reg_info = {
static struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
@ -2961,6 +2962,7 @@ static const struct bpf_iter_reg tcp_reg_info = {
static void __init bpf_iter_register(void)
{
tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON];
if (bpf_iter_reg_target(&tcp_reg_info))
pr_warn("Warning: could not register bpf iterator tcp\n");
}

View File

@ -106,6 +106,7 @@
#include <net/xfrm.h>
#include <trace/events/udp.h>
#include <linux/static_key.h>
#include <linux/btf_ids.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include "udp_impl.h"
@ -408,6 +409,25 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
struct sk_buff *skb,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum)
{
struct sock *reuse_sk = NULL;
u32 hash;
if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
/* Fall back to scoring if group has connections */
if (reuseport_has_conns(sk, false))
return NULL;
}
return reuse_sk;
}
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@ -418,7 +438,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
u32 hash = 0;
result = NULL;
badness = 0;
@ -426,15 +445,11 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result && !reuseport_has_conns(sk, false))
return result;
}
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
if (result)
return result;
badness = score;
result = sk;
}
@ -442,6 +457,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,
return result;
}
static inline struct sock *udp4_lookup_run_bpf(struct net *net,
struct udp_table *udptable,
struct sk_buff *skb,
__be32 saddr, __be16 sport,
__be32 daddr, u16 hnum)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
if (udptable != &udp_table)
return NULL; /* only UDP is supported */
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
saddr, sport, daddr, hnum, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@ -449,27 +487,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *result;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
/* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
if (!result) {
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
goto done;
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
hslot2, skb);
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
sk = udp4_lookup_run_bpf(net, udptable, skb,
saddr, sport, daddr, hnum);
if (sk) {
result = sk;
goto done;
}
}
/* Got non-wildcard socket or error on first lookup */
if (result)
goto done;
/* Lookup wildcard sockets */
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
hslot2, skb);
done:
if (IS_ERR(result))
return NULL;
return result;
@ -3153,7 +3209,7 @@ static void bpf_iter_fini_udp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
static const struct bpf_iter_reg udp_reg_info = {
static struct bpf_iter_reg udp_reg_info = {
.target = "udp",
.seq_ops = &bpf_iter_udp_seq_ops,
.init_seq_private = bpf_iter_init_udp,
@ -3168,6 +3224,7 @@ static const struct bpf_iter_reg udp_reg_info = {
static void __init bpf_iter_register(void)
{
udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
if (bpf_iter_reg_target(&udp_reg_info))
pr_warn("Warning: could not register bpf iterator udp\n");
}

View File

@ -21,6 +21,8 @@
#include <net/ip.h>
#include <net/sock_reuseport.h>
extern struct inet_hashinfo tcp_hashinfo;
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
@ -111,6 +113,23 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
__be16 sport,
const struct in6_addr *daddr,
unsigned short hnum)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_listen_hashbucket *ilb2,
@ -123,21 +142,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr, dif, sdif,
exact_dif);
if (score > hiscore) {
if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
}
result = lookup_reuseport(net, sk, skb, doff,
saddr, sport, daddr, hnum);
if (result)
return result;
result = sk;
hiscore = score;
}
@ -146,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
static inline struct sock *inet6_lookup_run_bpf(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
if (hashinfo != &tcp_hashinfo)
return NULL; /* only TCP is supported */
no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
saddr, sport, daddr, hnum, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@ -157,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
saddr, sport, daddr, hnum);
if (result)
goto done;
}
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);

View File

@ -61,6 +61,7 @@
#include <net/l3mdev.h>
#include <net/ip.h>
#include <linux/uaccess.h>
#include <linux/btf_ids.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
static const struct bpf_iter_reg ipv6_route_reg_info = {
BTF_ID_LIST(btf_fib6_info_id)
BTF_ID(struct, fib6_info)
static struct bpf_iter_reg ipv6_route_reg_info = {
.target = "ipv6_route",
.seq_ops = &ipv6_route_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = {
static int __init bpf_iter_register(void)
{
ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
return bpf_iter_reg_target(&ipv6_route_reg_info);
}

View File

@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
struct sk_buff *skb,
const struct in6_addr *saddr,
__be16 sport,
const struct in6_addr *daddr,
unsigned int hnum)
{
struct sock *reuse_sk = NULL;
u32 hash;
if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
/* Fall back to scoring if group has connections */
if (reuseport_has_conns(sk, false))
return NULL;
}
return reuse_sk;
}
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
u32 hash = 0;
result = NULL;
badness = -1;
@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
if (result)
return result;
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result && !reuseport_has_conns(sk, false))
return result;
}
result = sk;
badness = score;
}
@ -175,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net,
return result;
}
static inline struct sock *udp6_lookup_run_bpf(struct net *net,
struct udp_table *udptable,
struct sk_buff *skb,
const struct in6_addr *saddr,
__be16 sport,
const struct in6_addr *daddr,
u16 hnum)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
if (udptable != &udp_table)
return NULL; /* only UDP is supported */
no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
saddr, sport, daddr, hnum, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@ -185,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result;
struct sock *result, *sk;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
/* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
if (!result) {
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
goto done;
hslot2 = &udptable->hash2[slot2];
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
hslot2, skb);
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
sk = udp6_lookup_run_bpf(net, udptable, skb,
saddr, sport, daddr, hnum);
if (sk) {
result = sk;
goto done;
}
}
/* Got non-wildcard socket or error on first lookup */
if (result)
goto done;
/* Lookup wildcard sockets */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
hslot2, skb);
done:
if (IS_ERR(result))
return NULL;
return result;

View File

@ -60,6 +60,7 @@
#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
#include <linux/btf_ids.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@ -2803,7 +2804,10 @@ static const struct rhashtable_params netlink_rhashtable_params = {
};
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
static const struct bpf_iter_reg netlink_reg_info = {
BTF_ID_LIST(btf_netlink_sock_id)
BTF_ID(struct, netlink_sock)
static struct bpf_iter_reg netlink_reg_info = {
.target = "netlink",
.seq_ops = &netlink_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
@ -2818,6 +2822,7 @@ static const struct bpf_iter_reg netlink_reg_info = {
static int __init bpf_iter_register(void)
{
netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
return bpf_iter_reg_target(&netlink_reg_info);
}
#endif

View File

@ -12,7 +12,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
#define _(P) \
({ \
typeof(P) val; \
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
val; \
})
#define MINBLOCK_US 1

View File

@ -10,7 +10,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
#define _(P) \
({ \
typeof(P) val = 0; \
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
val; \
})
SEC("kprobe/__set_task_comm")
int prog(struct pt_regs *ctx)
@ -25,8 +30,9 @@ int prog(struct pt_regs *ctx)
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
bpf_probe_read_kernel(newcomm, sizeof(newcomm),
(void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;

View File

@ -11,7 +11,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
#define _(P) \
({ \
typeof(P) val = 0; \
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
val; \
})
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@ -34,7 +39,7 @@ int bpf_prog1(struct pt_regs *ctx)
dev = _(skb->dev);
len = _(skb->len);
bpf_probe_read(devname, sizeof(devname), dev->name);
bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";

View File

@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd;
bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd;
bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),

View File

@ -21,7 +21,7 @@
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
__uint(value_size, sizeof(struct bpf_cpumap_val));
__uint(max_entries, MAX_CPUS);
} cpu_map SEC(".maps");
@ -30,6 +30,9 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 issue;
__u64 xdp_pass;
__u64 xdp_drop;
__u64 xdp_redirect;
};
/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
@ -692,13 +695,16 @@ int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
* Code in: kernel/include/trace/events/xdp.h
*/
struct cpumap_kthread_ctx {
u64 __pad; // First 8 bytes are not accessible by bpf code
int map_id; // offset:8; size:4; signed:1;
u32 act; // offset:12; size:4; signed:0;
int cpu; // offset:16; size:4; signed:1;
unsigned int drops; // offset:20; size:4; signed:0;
unsigned int processed; // offset:24; size:4; signed:0;
int sched; // offset:28; size:4; signed:1;
u64 __pad; // First 8 bytes are not accessible
int map_id; // offset:8; size:4; signed:1;
u32 act; // offset:12; size:4; signed:0;
int cpu; // offset:16; size:4; signed:1;
unsigned int drops; // offset:20; size:4; signed:0;
unsigned int processed; // offset:24; size:4; signed:0;
int sched; // offset:28; size:4; signed:1;
unsigned int xdp_pass; // offset:32; size:4; signed:0;
unsigned int xdp_drop; // offset:36; size:4; signed:0;
unsigned int xdp_redirect; // offset:40; size:4; signed:0;
};
SEC("tracepoint/xdp/xdp_cpumap_kthread")
@ -712,6 +718,9 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
return 0;
rec->processed += ctx->processed;
rec->dropped += ctx->drops;
rec->xdp_pass += ctx->xdp_pass;
rec->xdp_drop += ctx->xdp_drop;
rec->xdp_redirect += ctx->xdp_redirect;
/* Count times kthread yielded CPU via schedule call */
if (ctx->sched)

View File

@ -70,6 +70,11 @@ static const struct option long_options[] = {
{"stress-mode", no_argument, NULL, 'x' },
{"no-separators", no_argument, NULL, 'z' },
{"force", no_argument, NULL, 'F' },
{"mprog-disable", no_argument, NULL, 'n' },
{"mprog-name", required_argument, NULL, 'e' },
{"mprog-filename", required_argument, NULL, 'f' },
{"redirect-device", required_argument, NULL, 'r' },
{"redirect-map", required_argument, NULL, 'm' },
{0, 0, NULL, 0 }
};
@ -156,6 +161,9 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 issue;
__u64 xdp_pass;
__u64 xdp_drop;
__u64 xdp_redirect;
};
struct record {
__u64 timestamp;
@ -175,6 +183,9 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec values[nr_cpus];
__u64 sum_xdp_redirect = 0;
__u64 sum_xdp_pass = 0;
__u64 sum_xdp_drop = 0;
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_issue = 0;
@ -196,10 +207,19 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
sum_dropped += values[i].dropped;
rec->cpu[i].issue = values[i].issue;
sum_issue += values[i].issue;
rec->cpu[i].xdp_pass = values[i].xdp_pass;
sum_xdp_pass += values[i].xdp_pass;
rec->cpu[i].xdp_drop = values[i].xdp_drop;
sum_xdp_drop += values[i].xdp_drop;
rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
sum_xdp_redirect += values[i].xdp_redirect;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.issue = sum_issue;
rec->total.xdp_pass = sum_xdp_pass;
rec->total.xdp_drop = sum_xdp_drop;
rec->total.xdp_redirect = sum_xdp_redirect;
return true;
}
@ -300,17 +320,33 @@ static __u64 calc_errs_pps(struct datarec *r,
return pps;
}
static void calc_xdp_pps(struct datarec *r, struct datarec *p,
double *xdp_pass, double *xdp_drop,
double *xdp_redirect, double period_)
{
*xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
if (period_ > 0) {
*xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
*xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
*xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
}
}
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
char *prog_name)
char *prog_name, char *mprog_name, int mprog_fd)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, drop = 0, err = 0;
bool mprog_enabled = false;
struct record *rec, *prev;
int to_cpu;
double t;
int i;
if (mprog_fd > 0)
mprog_enabled = true;
/* Header */
printf("Running XDP/eBPF prog_name:%s\n", prog_name);
printf("%-15s %-7s %-14s %-11s %-9s\n",
@ -455,6 +491,34 @@ static void stats_print(struct stats_record *stats_rec,
printf(fm2_err, "xdp_exception", "total", pps, drop);
}
/* CPUMAP attached XDP program that runs on remote/destination CPU */
if (mprog_enabled) {
char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
double xdp_pass, xdp_drop, xdp_redirect;
printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
printf("%-15s %-7s %-14s %-11s %-9s\n",
"XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
rec = &stats_rec->kthread;
prev = &stats_prev->kthread;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
&xdp_redirect, t);
if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
xdp_redirect);
}
calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
&xdp_redirect, t);
printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
}
printf("\n");
fflush(stdout);
}
@ -491,7 +555,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
*b = tmp;
}
static int create_cpu_entry(__u32 cpu, __u32 queue_size,
static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
__u32 avail_idx, bool new)
{
__u32 curr_cpus_count = 0;
@ -501,7 +565,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0);
if (ret) {
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
exit(EXIT_FAIL_BPF);
@ -532,9 +596,9 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
}
}
/* map_fd[7] = cpus_iterator */
printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
new ? "Add-new":"Replace", cpu, avail_idx,
queue_size, curr_cpus_count);
value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
}
@ -558,21 +622,26 @@ static void mark_cpus_unavailable(void)
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
static void stress_cpumap(void)
static void stress_cpumap(struct bpf_cpumap_val *value)
{
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
*/
create_cpu_entry(1, 1024, 0, false);
create_cpu_entry(1, 8, 0, false);
create_cpu_entry(1, 16000, 0, false);
value->qsize = 1024;
create_cpu_entry(1, value, 0, false);
value->qsize = 8;
create_cpu_entry(1, value, 0, false);
value->qsize = 16000;
create_cpu_entry(1, value, 0, false);
}
static void stats_poll(int interval, bool use_separators, char *prog_name,
char *mprog_name, struct bpf_cpumap_val *value,
bool stress_mode)
{
struct stats_record *record, *prev;
int mprog_fd;
record = alloc_stats_record();
prev = alloc_stats_record();
@ -584,11 +653,12 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
while (1) {
swap(&prev, &record);
mprog_fd = value->bpf_prog.fd;
stats_collect(record);
stats_print(record, prev, prog_name);
stats_print(record, prev, prog_name, mprog_name, mprog_fd);
sleep(interval);
if (stress_mode)
stress_cpumap();
stress_cpumap(value);
}
free_stats_record(record);
@ -661,15 +731,66 @@ static int init_map_fds(struct bpf_object *obj)
return 0;
}
static int load_cpumap_prog(char *file_name, char *prog_name,
char *redir_interface, char *redir_map)
{
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
.expected_attach_type = BPF_XDP_CPUMAP,
.file = file_name,
};
struct bpf_program *prog;
struct bpf_object *obj;
int fd;
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
return -1;
if (fd < 0) {
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
strerror(errno));
return fd;
}
if (redir_interface && redir_map) {
int err, map_fd, ifindex_out, key = 0;
map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
if (map_fd < 0)
return map_fd;
ifindex_out = if_nametoindex(redir_interface);
if (!ifindex_out)
return -1;
err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
if (err < 0)
return err;
}
prog = bpf_object__find_program_by_title(obj, prog_name);
if (!prog) {
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
return EXIT_FAIL;
}
return bpf_program__fd(prog);
}
int main(int argc, char **argv)
{
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
char *mprog_filename = "xdp_redirect_kern.o";
char *redir_interface = NULL, *redir_map = NULL;
char *mprog_name = "xdp_redirect_dummy";
bool mprog_disable = false;
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
struct bpf_cpumap_val value;
bool use_separators = true;
bool stress_mode = false;
struct bpf_program *prog;
@ -681,6 +802,7 @@ int main(int argc, char **argv)
int add_cpu = -1;
int opt, err;
int prog_fd;
int *cpu, i;
__u32 qsize;
n_cpus = get_nprocs_conf();
@ -716,8 +838,15 @@ int main(int argc, char **argv)
}
mark_cpus_unavailable();
cpu = malloc(n_cpus * sizeof(int));
if (!cpu) {
fprintf(stderr, "failed to allocate cpu array\n");
return EXIT_FAIL;
}
memset(cpu, 0, n_cpus * sizeof(int));
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
@ -751,6 +880,21 @@ int main(int argc, char **argv)
/* Selecting eBPF prog to load */
prog_name = optarg;
break;
case 'n':
mprog_disable = true;
break;
case 'f':
mprog_filename = optarg;
break;
case 'e':
mprog_name = optarg;
break;
case 'r':
redir_interface = optarg;
break;
case 'm':
redir_map = optarg;
break;
case 'c':
/* Add multiple CPUs */
add_cpu = strtoul(optarg, NULL, 0);
@ -760,8 +904,7 @@ int main(int argc, char **argv)
errno, strerror(errno));
goto error;
}
create_cpu_entry(add_cpu, qsize, added_cpus, true);
added_cpus++;
cpu[added_cpus++] = add_cpu;
break;
case 'q':
qsize = atoi(optarg);
@ -772,6 +915,7 @@ int main(int argc, char **argv)
case 'h':
error:
default:
free(cpu);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
@ -784,16 +928,31 @@ int main(int argc, char **argv)
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
usage(argv, obj);
return EXIT_FAIL_OPTION;
err = EXIT_FAIL_OPTION;
goto out;
}
/* Required option */
if (add_cpu == -1) {
fprintf(stderr, "ERR: required option --cpu missing\n");
fprintf(stderr, " Specify multiple --cpu option to add more\n");
usage(argv, obj);
return EXIT_FAIL_OPTION;
err = EXIT_FAIL_OPTION;
goto out;
}
value.bpf_prog.fd = 0;
if (!mprog_disable)
value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
redir_interface, redir_map);
if (value.bpf_prog.fd < 0) {
err = value.bpf_prog.fd;
goto out;
}
value.qsize = qsize;
for (i = 0; i < added_cpus; i++)
create_cpu_entry(cpu[i], &value, i, true);
/* Remove XDP program when program is interrupted or killed */
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
@ -801,27 +960,33 @@ int main(int argc, char **argv)
prog = bpf_object__find_program_by_title(obj, prog_name);
if (!prog) {
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
return EXIT_FAIL;
err = EXIT_FAIL;
goto out;
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
fprintf(stderr, "bpf_program__fd failed\n");
return EXIT_FAIL;
err = EXIT_FAIL;
goto out;
}
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
err = EXIT_FAIL_XDP;
goto out;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
goto out;
}
prog_id = info.id;
stats_poll(interval, use_separators, prog_name, stress_mode);
return EXIT_OK;
stats_poll(interval, use_separators, prog_name, mprog_name,
&value, stress_mode);
out:
free(cpu);
return err;
}

View File

@ -404,6 +404,7 @@ class PrinterHelpers(Printer):
type_fwds = [
'struct bpf_fib_lookup',
'struct bpf_sk_lookup',
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
@ -450,6 +451,7 @@ class PrinterHelpers(Printer):
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
'struct bpf_sk_lookup',
'struct bpf_sock',
'struct bpf_sock_addr',
'struct bpf_sock_ops',
@ -487,6 +489,11 @@ class PrinterHelpers(Printer):
'struct sk_msg_buff': 'struct sk_msg_md',
'struct xdp_buff': 'struct xdp_md',
}
# Helpers overloaded for different context types.
overloaded_helpers = [
'bpf_get_socket_cookie',
'bpf_sk_assign',
]
def print_header(self):
header = '''\
@ -543,7 +550,7 @@ class PrinterHelpers(Printer):
for i, a in enumerate(proto['args']):
t = a['type']
n = a['name']
if proto['name'] == 'bpf_get_socket_cookie' and i == 0:
if proto['name'] in self.overloaded_helpers and i == 0:
t = 'void'
n = 'ctx'
one_arg = '{}{}'.format(comma, self.map_type(t))

View File

@ -45,7 +45,7 @@ PROG COMMANDS
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** |
| **struct_ops** | **fentry** | **fexit** | **freplace**
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**

View File

@ -479,7 +479,7 @@ _bpftool()
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt struct_ops \
fentry fexit freplace" -- \
fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
;;

View File

@ -1,10 +1,11 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <fts.h>
#include <ftw.h>
#include <libgen.h>
#include <mntent.h>
#include <stdbool.h>
@ -64,6 +65,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_TRACE_FEXIT] = "fexit",
[BPF_MODIFY_RETURN] = "mod_ret",
[BPF_LSM_MAC] = "lsm_mac",
[BPF_SK_LOOKUP] = "sk_lookup",
};
void p_err(const char *fmt, ...)
@ -160,24 +162,35 @@ int mount_tracefs(const char *target)
return err;
}
int open_obj_pinned(char *path, bool quiet)
int open_obj_pinned(const char *path, bool quiet)
{
int fd;
char *pname;
int fd = -1;
fd = bpf_obj_get(path);
if (fd < 0) {
pname = strdup(path);
if (!pname) {
if (!quiet)
p_err("bpf obj get (%s): %s", path,
errno == EACCES && !is_bpffs(dirname(path)) ?
"directory not in bpf file system (bpffs)" :
strerror(errno));
return -1;
p_err("mem alloc failed");
goto out_ret;
}
fd = bpf_obj_get(pname);
if (fd < 0) {
if (!quiet)
p_err("bpf obj get (%s): %s", pname,
errno == EACCES && !is_bpffs(dirname(pname)) ?
"directory not in bpf file system (bpffs)" :
strerror(errno));
goto out_free;
}
out_free:
free(pname);
out_ret:
return fd;
}
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
{
enum bpf_obj_type type;
int fd;
@ -367,71 +380,82 @@ void print_hex_data_json(uint8_t *data, size_t len)
jsonw_end_array(json_wtr);
}
/* extra params for nftw cb */
static struct pinned_obj_table *build_fn_table;
static enum bpf_obj_type build_fn_type;
static int do_build_table_cb(const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftwbuf)
{
struct bpf_prog_info pinned_info;
__u32 len = sizeof(pinned_info);
struct pinned_obj *obj_node;
enum bpf_obj_type objtype;
int fd, err = 0;
if (typeflag != FTW_F)
goto out_ret;
fd = open_obj_pinned(fpath, true);
if (fd < 0)
goto out_ret;
objtype = get_fd_type(fd);
if (objtype != build_fn_type)
goto out_close;
memset(&pinned_info, 0, sizeof(pinned_info));
if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
goto out_close;
obj_node = calloc(1, sizeof(*obj_node));
if (!obj_node) {
err = -1;
goto out_close;
}
obj_node->id = pinned_info.id;
obj_node->path = strdup(fpath);
if (!obj_node->path) {
err = -1;
free(obj_node);
goto out_close;
}
hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
out_close:
close(fd);
out_ret:
return err;
}
int build_pinned_obj_table(struct pinned_obj_table *tab,
enum bpf_obj_type type)
{
struct bpf_prog_info pinned_info = {};
struct pinned_obj *obj_node = NULL;
__u32 len = sizeof(pinned_info);
struct mntent *mntent = NULL;
enum bpf_obj_type objtype;
FILE *mntfile = NULL;
FTSENT *ftse = NULL;
FTS *fts = NULL;
int fd, err;
int flags = FTW_PHYS;
int nopenfd = 16;
int err = 0;
mntfile = setmntent("/proc/mounts", "r");
if (!mntfile)
return -1;
build_fn_table = tab;
build_fn_type = type;
while ((mntent = getmntent(mntfile))) {
char *path[] = { mntent->mnt_dir, NULL };
char *path = mntent->mnt_dir;
if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
continue;
fts = fts_open(path, 0, NULL);
if (!fts)
continue;
while ((ftse = fts_read(fts))) {
if (!(ftse->fts_info & FTS_F))
continue;
fd = open_obj_pinned(ftse->fts_path, true);
if (fd < 0)
continue;
objtype = get_fd_type(fd);
if (objtype != type) {
close(fd);
continue;
}
memset(&pinned_info, 0, sizeof(pinned_info));
err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
if (err) {
close(fd);
continue;
}
obj_node = malloc(sizeof(*obj_node));
if (!obj_node) {
close(fd);
fts_close(fts);
fclose(mntfile);
return -1;
}
memset(obj_node, 0, sizeof(*obj_node));
obj_node->id = pinned_info.id;
obj_node->path = strdup(ftse->fts_path);
hash_add(tab->table, &obj_node->hash, obj_node->id);
close(fd);
}
fts_close(fts);
err = nftw(path, do_build_table_cb, nopenfd, flags);
if (err)
break;
}
fclose(mntfile);
return 0;
return err;
}
void delete_pinned_obj_table(struct pinned_obj_table *tab)

View File

@ -302,8 +302,11 @@ static int do_skeleton(int argc, char **argv)
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
char err_buf[256];
libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
obj = NULL;
p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
goto out;
}

View File

@ -152,8 +152,8 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(char *path, bool quiet);
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
int open_obj_pinned(const char *path, bool quiet);
int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);

View File

@ -59,6 +59,7 @@ const char * const prog_type_name[] = {
[BPF_PROG_TYPE_TRACING] = "tracing",
[BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_PROG_TYPE_EXT] = "ext",
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
};
const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
@ -1905,7 +1906,7 @@ static int do_help(int argc, char **argv)
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt |\n"
" struct_ops | fentry | fexit | freplace }\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"

View File

@ -71,7 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
task->group_leader->comm);
bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
return 0;

View File

@ -3,6 +3,8 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
/*
@ -21,7 +23,7 @@
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
".local " #symbol " ; \n" \
".type " #symbol ", @object; \n" \
".type " #symbol ", STT_OBJECT; \n" \
".size " #symbol ", 4; \n" \
#symbol ": \n" \
".zero 4 \n" \
@ -55,17 +57,20 @@ asm( \
* .zero 4
*
*/
#define __BTF_ID_LIST(name) \
#define __BTF_ID_LIST(name, scope) \
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
".local " #name "; \n" \
"." #scope " " #name "; \n" \
#name ":; \n" \
".popsection; \n"); \
#define BTF_ID_LIST(name) \
__BTF_ID_LIST(name) \
__BTF_ID_LIST(name, local) \
extern u32 name[];
#define BTF_ID_LIST_GLOBAL(name) \
__BTF_ID_LIST(name, globl)
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@ -83,5 +88,43 @@ asm( \
".zero 4 \n" \
".popsection; \n");
#else
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
#endif /* CONFIG_DEBUG_INFO_BTF */
#ifdef CONFIG_NET
/* Define a list of socket types which can be the argument for
* skc_to_*_sock() helpers. All these sockets should have
* sock_common as the first argument in its memory layout.
*/
#define BTF_SOCK_TYPE_xxx \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
enum {
#define BTF_SOCK_TYPE(name, str) name,
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE
MAX_BTF_SOCK_TYPE,
};
extern u32 btf_sock_ids[];
#endif
#endif

View File

@ -189,6 +189,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
};
enum bpf_attach_type {
@ -227,6 +228,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
BPF_CGROUP_INET_SOCK_RELEASE,
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
__MAX_BPF_ATTACH_TYPE
};
@ -2419,7 +2422,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@ -2456,7 +2459,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
* socket lookup table in the netns associated with the *ctx* will
* socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@ -3068,6 +3071,10 @@ union bpf_attr {
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
* **BPF_PROG_TYPE_SCHED_ACT** programs.
*
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
* will cause *skb* to be delivered to the specified socket.
@ -3093,6 +3100,56 @@ union bpf_attr {
* **-ESOCKTNOSUPPORT** if the socket type is not supported
* (reuseport).
*
* long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
*
* Select the *sk* as a result of a socket lookup.
*
* For the operation to succeed passed socket must be compatible
* with the packet description provided by the *ctx* object.
*
* L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
* be an exact match. While IP family (**AF_INET** or
* **AF_INET6**) must be compatible, that is IPv6 sockets
* that are not v6-only can be selected for IPv4 packets.
*
* Only TCP listeners and UDP unconnected sockets can be
* selected. *sk* can also be NULL to reset any previous
* selection.
*
* *flags* argument can combination of following values:
*
* * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
* socket selection, potentially done by a BPF program
* that ran before us.
*
* * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
* load-balancing within reuseport group for the socket
* being selected.
*
* On success *ctx->sk* will point to the selected socket.
*
* Return
* 0 on success, or a negative errno in case of failure.
*
* * **-EAFNOSUPPORT** if socket family (*sk->family*) is
* not compatible with packet family (*ctx->family*).
*
* * **-EEXIST** if socket has been already selected,
* potentially by another program, and
* **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
*
* * **-EINVAL** if unsupported flags were specified.
*
* * **-EPROTOTYPE** if socket L4 protocol
* (*sk->protocol*) doesn't match packet protocol
* (*ctx->protocol*).
*
* * **-ESOCKTNOSUPPORT** if socket is not in allowed
* state (TCP listening or UDP unconnected).
*
* u64 bpf_ktime_get_boot_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
@ -3606,6 +3663,12 @@ enum {
BPF_RINGBUF_HDR_SZ = 8,
};
/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
enum {
BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0),
BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1),
};
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@ -3849,6 +3912,19 @@ struct bpf_devmap_val {
} bpf_prog;
};
/* CPUMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
* New members can only be added to the end of this structure.
*/
struct bpf_cpumap_val {
__u32 qsize; /* queue size to remote target CPU */
union {
int fd; /* prog fd on map write */
__u32 id; /* prog id on map read */
} bpf_prog;
};
enum sk_action {
SK_DROP = 0,
SK_PASS,
@ -3986,7 +4062,7 @@ struct bpf_link_info {
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach attach type).
* attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
@ -4335,4 +4411,19 @@ struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
__u32 remote_ip4; /* Network byte order */
__u32 remote_ip6[4]; /* Network byte order */
__u32 remote_port; /* Network byte order */
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -40,7 +40,7 @@
* Helper macro to manipulate data structures
*/
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
#endif
#ifndef container_of
#define container_of(ptr, type, member) \

View File

@ -6799,6 +6799,7 @@ BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
bpf_program__get_expected_attach_type(struct bpf_program *prog)
@ -6912,6 +6913,8 @@ static const struct bpf_sec_def section_defs[] = {
.attach_fn = attach_iter),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
BPF_XDP_CPUMAP),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@ -6979,6 +6982,8 @@ static const struct bpf_sec_def section_defs[] = {
BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
BPF_CGROUP_SETSOCKOPT),
BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
BPF_SK_LOOKUP),
};
#undef BPF_PROG_SEC_IMPL

View File

@ -350,6 +350,7 @@ LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@ -377,6 +378,7 @@ LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
/*
* No need for __attribute__((packed)), all members of 'bpf_map_def'

View File

@ -287,6 +287,8 @@ LIBBPF_0.1.0 {
bpf_map__type;
bpf_map__value_size;
bpf_program__autoload;
bpf_program__is_sk_lookup;
bpf_program__set_autoload;
bpf_program__set_sk_lookup;
btf__set_fd;
} LIBBPF_0.0.9;

View File

@ -78,6 +78,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
break;
case BPF_PROG_TYPE_SK_LOOKUP:
xattr.expected_attach_type = BPF_SK_LOOKUP;
break;
case BPF_PROG_TYPE_KPROBE:
xattr.kern_version = get_kernel_version();
break;

View File

@ -73,29 +73,8 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
socklen_t len;
int fd;
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)&addr;
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
log_err("inet_pton(AF_INET, %s)", addr_str);
return -1;
}
len = sizeof(*sin);
} else {
struct sockaddr_in6 *sin6 = (void *)&addr;
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
log_err("inet_pton(AF_INET6, %s)", addr_str);
return -1;
}
len = sizeof(*sin6);
}
if (make_sockaddr(family, addr_str, port, &addr, &len))
return -1;
fd = socket(family, type, 0);
if (fd < 0) {
@ -194,3 +173,36 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return 0;
}
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len)
{
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)addr;
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
log_err("inet_pton(AF_INET, %s)", addr_str);
return -1;
}
if (len)
*len = sizeof(*sin);
return 0;
} else if (family == AF_INET6) {
struct sockaddr_in6 *sin6 = (void *)addr;
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
log_err("inet_pton(AF_INET6, %s)", addr_str);
return -1;
}
if (len)
*len = sizeof(*sin6);
return 0;
}
return -1;
}

View File

@ -37,5 +37,7 @@ int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
#endif

View File

@ -6,6 +6,7 @@
#include <bpf/libbpf.h>
#include <linux/btf.h>
#include <linux/kernel.h>
#define CONFIG_DEBUG_INFO_BTF
#include <linux/btf_ids.h>
#include "test_progs.h"
@ -27,7 +28,17 @@ struct symbol test_symbols[] = {
{ "func", BTF_KIND_FUNC, -1 },
};
BTF_ID_LIST(test_list)
BTF_ID_LIST(test_list_local)
BTF_ID_UNUSED
BTF_ID(typedef, S)
BTF_ID(typedef, T)
BTF_ID(typedef, U)
BTF_ID(struct, S)
BTF_ID(union, U)
BTF_ID(func, func)
extern __u32 test_list_global[];
BTF_ID_LIST_GLOBAL(test_list_global)
BTF_ID_UNUSED
BTF_ID(typedef, S)
BTF_ID(typedef, T)
@ -93,18 +104,25 @@ static int resolve_symbols(void)
int test_resolve_btfids(void)
{
unsigned int i;
__u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
unsigned int i, j;
int ret = 0;
if (resolve_symbols())
return -1;
/* Check BTF_ID_LIST(test_list) IDs */
for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
ret = CHECK(test_list[i] != test_symbols[i].id,
"id_check",
"wrong ID for %s (%d != %d)\n", test_symbols[i].name,
test_list[i], test_symbols[i].id);
/* Check BTF_ID_LIST(test_list_local) and
* BTF_ID_LIST_GLOBAL(test_list_global) IDs
*/
for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
test_list = test_lists[j];
for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
ret = CHECK(test_list[i] != test_symbols[i].id,
"id_check",
"wrong ID for %s (%d != %d)\n",
test_symbols[i].name,
test_list[i], test_symbols[i].id);
}
}
return ret;

File diff suppressed because it is too large Load Diff

View File

@ -193,11 +193,10 @@ static void run_test(int cgroup_fd)
if (CHECK_FAIL(server_fd < 0))
goto close_bpf_object;
pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
goto close_server_fd;
pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);

View File

@ -0,0 +1,70 @@
// SPDX-License-Identifier: GPL-2.0
#include <uapi/linux/bpf.h>
#include <linux/if_link.h>
#include <test_progs.h>
#include "test_xdp_with_cpumap_helpers.skel.h"
#define IFINDEX_LO 1
void test_xdp_with_cpumap_helpers(void)
{
struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {};
struct bpf_cpumap_val val = {
.qsize = 192,
};
__u32 duration = 0, idx = 0;
__u32 len = sizeof(info);
int err, prog_fd, map_fd;
skel = test_xdp_with_cpumap_helpers__open_and_load();
if (CHECK_FAIL(!skel)) {
perror("test_xdp_with_cpumap_helpers__open_and_load");
return;
}
/* can not attach program with cpumaps that allow programs
* as xdp generic
*/
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
"should have failed\n");
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
map_fd = bpf_map__fd(skel->maps.cpu_map);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
if (CHECK_FAIL(err))
goto out_close;
val.bpf_prog.fd = prog_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
err, errno);
err = bpf_map_lookup_elem(map_fd, &idx, &val);
CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
"expected %u read %u\n", info.id, val.bpf_prog.id);
/* can not attach BPF_XDP_CPUMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
"should have failed\n");
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
"should have failed\n");
out_close:
test_xdp_with_cpumap_helpers__destroy(skel);
}
void test_xdp_cpumap_attach(void)
{
if (test__start_subtest("cpumap_with_progs"))
test_xdp_with_cpumap_helpers();
}

View File

@ -36,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
if (!nlk->groups) {
group = 0;
} else {
/* FIXME: temporary use bpf_probe_read here, needs
/* FIXME: temporary use bpf_probe_read_kernel here, needs
* verifier support to do direct access.
*/
bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
}
BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
nlk->portid, (u32)group,
@ -56,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
* with current verifier.
*/
inode = SOCK_INODE(sk);
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
}
BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);

View File

@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}

View File

@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}

View File

@ -18,7 +18,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}

View File

@ -25,7 +25,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}

View File

@ -0,0 +1,641 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
// Copyright (c) 2020 Cloudflare
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/in.h>
#include <sys/socket.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#define IP4(a, b, c, d) \
bpf_htonl((((__u32)(a) & 0xffU) << 24) | \
(((__u32)(b) & 0xffU) << 16) | \
(((__u32)(c) & 0xffU) << 8) | \
(((__u32)(d) & 0xffU) << 0))
#define IP6(aaaa, bbbb, cccc, dddd) \
{ bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
#define MAX_SOCKS 32
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, MAX_SOCKS);
__type(key, __u32);
__type(value, __u64);
} redir_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 2);
__type(key, int);
__type(value, int);
} run_map SEC(".maps");
enum {
PROG1 = 0,
PROG2,
};
enum {
SERVER_A = 0,
SERVER_B,
};
/* Addressable key/value constants for convenience */
static const int KEY_PROG1 = PROG1;
static const int KEY_PROG2 = PROG2;
static const int PROG_DONE = 1;
static const __u32 KEY_SERVER_A = SERVER_A;
static const __u32 KEY_SERVER_B = SERVER_B;
static const __u16 DST_PORT = 7007; /* Host byte order */
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
SEC("sk_lookup/lookup_pass")
int lookup_pass(struct bpf_sk_lookup *ctx)
{
return SK_PASS;
}
SEC("sk_lookup/lookup_drop")
int lookup_drop(struct bpf_sk_lookup *ctx)
{
return SK_DROP;
}
SEC("sk_reuseport/reuse_pass")
int reuseport_pass(struct sk_reuseport_md *ctx)
{
return SK_PASS;
}
SEC("sk_reuseport/reuse_drop")
int reuseport_drop(struct sk_reuseport_md *ctx)
{
return SK_DROP;
}
/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
SEC("sk_lookup/redir_port")
int redir_port(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
if (ctx->local_port != DST_PORT)
return SK_PASS;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
SEC("sk_lookup/redir_ip4")
int redir_ip4(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
if (ctx->family != AF_INET)
return SK_PASS;
if (ctx->local_port != DST_PORT)
return SK_PASS;
if (ctx->local_ip4 != DST_IP4)
return SK_PASS;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
SEC("sk_lookup/redir_ip6")
int redir_ip6(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
if (ctx->family != AF_INET6)
return SK_PASS;
if (ctx->local_port != DST_PORT)
return SK_PASS;
if (ctx->local_ip6[0] != DST_IP6[0] ||
ctx->local_ip6[1] != DST_IP6[1] ||
ctx->local_ip6[2] != DST_IP6[2] ||
ctx->local_ip6[3] != DST_IP6[3])
return SK_PASS;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
SEC("sk_lookup/select_sock_a")
int select_sock_a(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
SEC("sk_lookup/select_sock_a_no_reuseport")
int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_DROP;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
SEC("sk_reuseport/select_sock_b")
int select_sock_b(struct sk_reuseport_md *ctx)
{
__u32 key = KEY_SERVER_B;
int err;
err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
return err ? SK_DROP : SK_PASS;
}
/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
SEC("sk_lookup/sk_assign_eexist")
int sk_assign_eexist(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, ret;
ret = SK_DROP;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err)
goto out;
bpf_sk_release(sk);
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err != -EEXIST) {
bpf_printk("sk_assign returned %d, expected %d\n",
err, -EEXIST);
goto out;
}
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
SEC("sk_lookup/sk_assign_replace_flag")
int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, ret;
ret = SK_DROP;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err)
goto out;
bpf_sk_release(sk);
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
if (err) {
bpf_printk("sk_assign returned %d, expected 0\n", err);
goto out;
}
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
/* Check that bpf_sk_assign(sk=NULL) is accepted. */
SEC("sk_lookup/sk_assign_null")
int sk_assign_null(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk = NULL;
int err, ret;
ret = SK_DROP;
err = bpf_sk_assign(ctx, NULL, 0);
if (err) {
bpf_printk("sk_assign returned %d, expected 0\n", err);
goto out;
}
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
if (err) {
bpf_printk("sk_assign returned %d, expected 0\n", err);
goto out;
}
if (ctx->sk != sk)
goto out;
err = bpf_sk_assign(ctx, NULL, 0);
if (err != -EEXIST)
goto out;
err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
/* Check that selected sk is accessible through context. */
SEC("sk_lookup/access_ctx_sk")
int access_ctx_sk(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk1 = NULL, *sk2 = NULL;
int err, ret;
ret = SK_DROP;
/* Try accessing unassigned (NULL) ctx->sk field */
if (ctx->sk && ctx->sk->family != AF_INET)
goto out;
/* Assign a value to ctx->sk */
sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk1)
goto out;
err = bpf_sk_assign(ctx, sk1, 0);
if (err)
goto out;
if (ctx->sk != sk1)
goto out;
/* Access ctx->sk fields */
if (ctx->sk->family != AF_INET ||
ctx->sk->type != SOCK_STREAM ||
ctx->sk->state != BPF_TCP_LISTEN)
goto out;
/* Reset selection */
err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
if (ctx->sk)
goto out;
/* Assign another socket */
sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk2)
goto out;
err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
if (ctx->sk != sk2)
goto out;
/* Access reassigned ctx->sk fields */
if (ctx->sk->family != AF_INET ||
ctx->sk->type != SOCK_STREAM ||
ctx->sk->state != BPF_TCP_LISTEN)
goto out;
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk1)
bpf_sk_release(sk1);
if (sk2)
bpf_sk_release(sk2);
return ret;
}
/* Check narrow loads from ctx fields that support them.
*
* Narrow loads of size >= target field size from a non-zero offset
* are not covered because they give bogus results, that is the
* verifier ignores the offset.
*/
SEC("sk_lookup/ctx_narrow_access")
int ctx_narrow_access(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, family;
__u16 *half;
__u8 *byte;
bool v4;
v4 = (ctx->family == AF_INET);
/* Narrow loads from family field */
byte = (__u8 *)&ctx->family;
half = (__u16 *)&ctx->family;
if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
return SK_DROP;
if (half[0] != (v4 ? AF_INET : AF_INET6))
return SK_DROP;
byte = (__u8 *)&ctx->protocol;
if (byte[0] != IPPROTO_TCP ||
byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->protocol;
if (half[0] != IPPROTO_TCP)
return SK_DROP;
/* Narrow loads from remote_port field. Expect non-0 value. */
byte = (__u8 *)&ctx->remote_port;
if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_port;
if (half[0] == 0)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
byte = (__u8 *)&ctx->local_port;
if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
byte[1] != ((DST_PORT >> 8) & 0xff) ||
byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->local_port;
if (half[0] != DST_PORT)
return SK_DROP;
/* Narrow loads from IPv4 fields */
if (v4) {
/* Expect non-0.0.0.0 in remote_ip4 */
byte = (__u8 *)&ctx->remote_ip4;
if (byte[0] == 0 && byte[1] == 0 &&
byte[2] == 0 && byte[3] == 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip4;
if (half[0] == 0 && half[1] == 0)
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
byte = (__u8 *)&ctx->local_ip4;
if (byte[0] != ((DST_IP4 >> 0) & 0xff) ||
byte[1] != ((DST_IP4 >> 8) & 0xff) ||
byte[2] != ((DST_IP4 >> 16) & 0xff) ||
byte[3] != ((DST_IP4 >> 24) & 0xff))
return SK_DROP;
half = (__u16 *)&ctx->local_ip4;
if (half[0] != ((DST_IP4 >> 0) & 0xffff) ||
half[1] != ((DST_IP4 >> 16) & 0xffff))
return SK_DROP;
} else {
/* Expect 0.0.0.0 IPs when family != AF_INET */
byte = (__u8 *)&ctx->remote_ip4;
if (byte[0] != 0 || byte[1] != 0 &&
byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip4;
if (half[0] != 0 || half[1] != 0)
return SK_DROP;
byte = (__u8 *)&ctx->local_ip4;
if (byte[0] != 0 || byte[1] != 0 &&
byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->local_ip4;
if (half[0] != 0 || half[1] != 0)
return SK_DROP;
}
/* Narrow loads from IPv6 fields */
if (!v4) {
/* Expenct non-:: IP in remote_ip6 */
byte = (__u8 *)&ctx->remote_ip6;
if (byte[0] == 0 && byte[1] == 0 &&
byte[2] == 0 && byte[3] == 0 &&
byte[4] == 0 && byte[5] == 0 &&
byte[6] == 0 && byte[7] == 0 &&
byte[8] == 0 && byte[9] == 0 &&
byte[10] == 0 && byte[11] == 0 &&
byte[12] == 0 && byte[13] == 0 &&
byte[14] == 0 && byte[15] == 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip6;
if (half[0] == 0 && half[1] == 0 &&
half[2] == 0 && half[3] == 0 &&
half[4] == 0 && half[5] == 0 &&
half[6] == 0 && half[7] == 0)
return SK_DROP;
/* Expect DST_IP6 in local_ip6 */
byte = (__u8 *)&ctx->local_ip6;
if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) ||
byte[1] != ((DST_IP6[0] >> 8) & 0xff) ||
byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
byte[4] != ((DST_IP6[1] >> 0) & 0xff) ||
byte[5] != ((DST_IP6[1] >> 8) & 0xff) ||
byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
byte[8] != ((DST_IP6[2] >> 0) & 0xff) ||
byte[9] != ((DST_IP6[2] >> 8) & 0xff) ||
byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
byte[12] != ((DST_IP6[3] >> 0) & 0xff) ||
byte[13] != ((DST_IP6[3] >> 8) & 0xff) ||
byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
byte[15] != ((DST_IP6[3] >> 24) & 0xff))
return SK_DROP;
half = (__u16 *)&ctx->local_ip6;
if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) ||
half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
half[2] != ((DST_IP6[1] >> 0) & 0xffff) ||
half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
half[4] != ((DST_IP6[2] >> 0) & 0xffff) ||
half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
half[6] != ((DST_IP6[3] >> 0) & 0xffff) ||
half[7] != ((DST_IP6[3] >> 16) & 0xffff))
return SK_DROP;
} else {
/* Expect :: IPs when family != AF_INET6 */
byte = (__u8 *)&ctx->remote_ip6;
if (byte[0] != 0 || byte[1] != 0 ||
byte[2] != 0 || byte[3] != 0 ||
byte[4] != 0 || byte[5] != 0 ||
byte[6] != 0 || byte[7] != 0 ||
byte[8] != 0 || byte[9] != 0 ||
byte[10] != 0 || byte[11] != 0 ||
byte[12] != 0 || byte[13] != 0 ||
byte[14] != 0 || byte[15] != 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip6;
if (half[0] != 0 || half[1] != 0 ||
half[2] != 0 || half[3] != 0 ||
half[4] != 0 || half[5] != 0 ||
half[6] != 0 || half[7] != 0)
return SK_DROP;
byte = (__u8 *)&ctx->local_ip6;
if (byte[0] != 0 || byte[1] != 0 ||
byte[2] != 0 || byte[3] != 0 ||
byte[4] != 0 || byte[5] != 0 ||
byte[6] != 0 || byte[7] != 0 ||
byte[8] != 0 || byte[9] != 0 ||
byte[10] != 0 || byte[11] != 0 ||
byte[12] != 0 || byte[13] != 0 ||
byte[14] != 0 || byte[15] != 0)
return SK_DROP;
half = (__u16 *)&ctx->local_ip6;
if (half[0] != 0 || half[1] != 0 ||
half[2] != 0 || half[3] != 0 ||
half[4] != 0 || half[5] != 0 ||
half[6] != 0 || half[7] != 0)
return SK_DROP;
}
/* Success, redirect to KEY_SERVER_B */
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (sk) {
bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
}
return SK_PASS;
}
/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
SEC("sk_lookup/sk_assign_esocknosupport")
int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, ret;
ret = SK_DROP;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err != -ESOCKTNOSUPPORT) {
bpf_printk("sk_assign returned %d, expected %d\n",
err, -ESOCKTNOSUPPORT);
goto out;
}
ret = SK_PASS; /* Success, pass to regular lookup */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
SEC("sk_lookup/multi_prog_pass1")
int multi_prog_pass1(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
SEC("sk_lookup/multi_prog_pass2")
int multi_prog_pass2(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
SEC("sk_lookup/multi_prog_drop1")
int multi_prog_drop1(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_DROP;
}
SEC("sk_lookup/multi_prog_drop2")
int multi_prog_drop2(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_DROP;
}
static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_DROP;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
if (err)
return SK_DROP;
return SK_PASS;
}
SEC("sk_lookup/multi_prog_redir1")
int multi_prog_redir1(struct bpf_sk_lookup *ctx)
{
int ret;
ret = select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
SEC("sk_lookup/multi_prog_redir2")
int multi_prog_redir2(struct bpf_sk_lookup *ctx)
{
int ret;
ret = select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
char _license[] SEC("license") = "Dual BSD/GPL";
__u32 _version SEC("version") = 1;

View File

@ -0,0 +1,36 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#define IFINDEX_LO 1
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(struct bpf_cpumap_val));
__uint(max_entries, 4);
} cpu_map SEC(".maps");
SEC("xdp_redir")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&cpu_map, 1, 0);
}
SEC("xdp_dummy")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
}
SEC("xdp_cpumap/dummy_cm")
int xdp_dummy_cm(struct xdp_md *ctx)
{
if (ctx->ingress_ifindex == IFINDEX_LO)
return XDP_DROP;
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then
exit $ksft_skip
fi
SRC_TREE=../../../../
if [ "$building_out_of_srctree" ]; then
# We are in linux-build/kselftest/bpf
OUTPUT=../../
else
# We are in linux/tools/testing/selftests/bpf
OUTPUT=../../../../
fi
test_run()
{
@ -19,8 +25,8 @@ test_run()
echo "[ JIT enabled:$1 hardened:$2 ]"
dmesg -C
if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
if [ $? -ne 0 ]; then
rc=1
fi

View File

@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
kill -INT $!
kill -TERM $!
if [[ $(< $TMP_FILE) != "foobar" ]]; then
exit 1

View File

@ -0,0 +1,492 @@
{
"valid 1,2,4,8-byte reads from bpf_sk_lookup",
.insns = {
/* 1-byte read from family field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family) + 3),
/* 2-byte read from family field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family) + 2),
/* 4-byte read from family field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family)),
/* 1-byte read from protocol field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol) + 3),
/* 2-byte read from protocol field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol) + 2),
/* 4-byte read from protocol field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol)),
/* 1-byte read from remote_ip4 field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
/* 2-byte read from remote_ip4 field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
/* 4-byte read from remote_ip4 field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4)),
/* 1-byte read from remote_ip6 field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
/* 2-byte read from remote_ip6 field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
/* 4-byte read from remote_ip6 field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6)),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
/* 1-byte read from remote_port field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port) + 3),
/* 2-byte read from remote_port field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port) + 2),
/* 4-byte read from remote_port field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port)),
/* 1-byte read from local_ip4 field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4) + 3),
/* 2-byte read from local_ip4 field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4) + 2),
/* 4-byte read from local_ip4 field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4)),
/* 1-byte read from local_ip6 field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 3),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 4),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 5),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 6),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 7),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 8),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 9),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 10),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 11),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 12),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 13),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 14),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 15),
/* 2-byte read from local_ip6 field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 2),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 4),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 6),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 8),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 10),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 12),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 14),
/* 4-byte read from local_ip6 field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6)),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 4),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 8),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6) + 12),
/* 1-byte read from local_port field */
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port)),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port) + 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port) + 2),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port) + 3),
/* 2-byte read from local_port field */
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port)),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port) + 2),
/* 4-byte read from local_port field */
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port)),
/* 8-byte read from sk field */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, sk)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
{
"invalid 8-byte read from bpf_sk_lookup family field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, family)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup protocol field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, protocol)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip4)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_ip6)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup remote_port field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, remote_port)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup local_ip4 field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip4)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup local_ip6 field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_ip6)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 8-byte read from bpf_sk_lookup local_port field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
{
"invalid 4-byte read from bpf_sk_lookup sk field",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, sk)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 2-byte read from bpf_sk_lookup sk field",
.insns = {
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, sk)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 1-byte read from bpf_sk_lookup sk field",
.insns = {
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, sk)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
/* out of bounds and unaligned reads from bpf_sk_lookup */
{
"invalid 4-byte read past end of bpf_sk_lookup",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
sizeof(struct bpf_sk_lookup)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
/* in-bound and out-of-bound writes to bpf_sk_lookup */
{
"invalid 8-byte write to bpf_sk_lookup",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 4-byte write to bpf_sk_lookup",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 2-byte write to bpf_sk_lookup",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 1-byte write to bpf_sk_lookup",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},
{
"invalid 4-byte write past end of bpf_sk_lookup",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
sizeof(struct bpf_sk_lookup)),
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
},