Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2020-03-13

The following pull-request contains BPF updates for your *net-next* tree.

We've added 86 non-merge commits during the last 12 day(s) which contain
a total of 107 files changed, 5771 insertions(+), 1700 deletions(-).

The main changes are:

1) Add modify_return attach type which allows to attach to a function via
   BPF trampoline and is run after the fentry and before the fexit programs
   and can pass a return code to the original caller, from KP Singh.

2) Generalize BPF's kallsyms handling and add BPF trampoline and dispatcher
   objects to be visible in /proc/kallsyms so they can be annotated in
   stack traces, from Jiri Olsa.

3) Extend BPF sockmap to allow for UDP next to existing TCP support in order
   in order to enable this for BPF based socket dispatch, from Lorenz Bauer.

4) Introduce a new bpftool 'prog profile' command which attaches to existing
   BPF programs via fentry and fexit hooks and reads out hardware counters
   during that period, from Song Liu. Example usage:

   bpftool prog profile id 337 duration 3 cycles instructions llc_misses

        4228 run_cnt
     3403698 cycles                                              (84.08%)
     3525294 instructions   #  1.04 insn per cycle               (84.05%)
          13 llc_misses     #  3.69 LLC misses per million isns  (83.50%)

5) Batch of improvements to libbpf, bpftool and BPF selftests. Also addition
   of a new bpf_link abstraction to keep in particular BPF tracing programs
   attached even when the applicaion owning them exits, from Andrii Nakryiko.

6) New bpf_get_current_pid_tgid() helper for tracing to perform PID filtering
   and which returns the PID as seen by the init namespace, from Carlos Neira.

7) Refactor of RISC-V JIT code to move out common pieces and addition of a
   new RV32G BPF JIT compiler, from Luke Nelson.

8) Add gso_size context member to __sk_buff in order to be able to know whether
   a given skb is GSO or not, from Willem de Bruijn.

9) Add a new bpf_xdp_output() helper which reuses XDP's existing perf RB output
   implementation but can be called from tracepoint programs, from Eelco Chaudron.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-03-13 20:52:03 -07:00
commit 44ef976ab3
107 changed files with 5779 additions and 1708 deletions

View File

@ -67,7 +67,8 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
- sparc64
- mips64
- s390x
- riscv
- riscv64
- riscv32
And the older cBPF JIT supported on the following archs:

View File

@ -606,7 +606,7 @@ before a conversion to the new layout is being done behind the scenes!
Currently, the classic BPF format is being used for JITing on most
32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF
sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF
instruction set.
Some core changes of the new internal format:

View File

@ -3213,11 +3213,22 @@ L: bpf@vger.kernel.org
S: Maintained
F: arch/powerpc/net/
BPF JIT for RISC-V (RV64G)
M: Björn Töpel <bjorn.topel@gmail.com>
BPF JIT for RISC-V (32-bit)
M: Luke Nelson <luke.r.nels@gmail.com>
M: Xi Wang <xi.wang@gmail.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: arch/riscv/net/
X: arch/riscv/net/bpf_jit_comp64.c
BPF JIT for RISC-V (64-bit)
M: Björn Töpel <bjorn.topel@gmail.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: arch/riscv/net/
X: arch/riscv/net/bpf_jit_comp32.c
BPF JIT for S390
M: Ilya Leoshkevich <iii@linux.ibm.com>
@ -9350,6 +9361,8 @@ F: include/net/l3mdev.h
L7 BPF FRAMEWORK
M: John Fastabend <john.fastabend@gmail.com>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Jakub Sitnicki <jakub@cloudflare.com>
M: Lorenz Bauer <lmb@cloudflare.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
@ -9357,6 +9370,7 @@ F: include/linux/skmsg.h
F: net/core/skmsg.c
F: net/core/sock_map.c
F: net/ipv4/tcp_bpf.c
F: net/ipv4/udp_bpf.c
LANTIQ / INTEL Ethernet drivers
M: Hauke Mehrtens <hauke@hauke-m.de>

View File

@ -56,7 +56,7 @@ config RISCV
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MMIOWB
select ARCH_HAS_DEBUG_VIRTUAL
select HAVE_EBPF_JIT if 64BIT
select HAVE_EBPF_JIT
select EDAC_SUPPORT
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT

View File

@ -1,2 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o
ifeq ($(CONFIG_ARCH_RV64I),y)
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
else
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
endif

514
arch/riscv/net/bpf_jit.h Normal file
View File

@ -0,0 +1,514 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common functionality for RV32 and RV64 BPF JIT compilers
*
* Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com>
*
*/
#ifndef _BPF_JIT_H
#define _BPF_JIT_H
#include <linux/bpf.h>
#include <linux/filter.h>
#include <asm/cacheflush.h>
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
RV_REG_SP = 2, /* Stack pointer */
RV_REG_GP = 3, /* Global pointer */
RV_REG_TP = 4, /* Thread pointer */
RV_REG_T0 = 5, /* Temporaries */
RV_REG_T1 = 6,
RV_REG_T2 = 7,
RV_REG_FP = 8, /* Saved register/frame pointer */
RV_REG_S1 = 9, /* Saved register */
RV_REG_A0 = 10, /* Function argument/return values */
RV_REG_A1 = 11, /* Function arguments */
RV_REG_A2 = 12,
RV_REG_A3 = 13,
RV_REG_A4 = 14,
RV_REG_A5 = 15,
RV_REG_A6 = 16,
RV_REG_A7 = 17,
RV_REG_S2 = 18, /* Saved registers */
RV_REG_S3 = 19,
RV_REG_S4 = 20,
RV_REG_S5 = 21,
RV_REG_S6 = 22,
RV_REG_S7 = 23,
RV_REG_S8 = 24,
RV_REG_S9 = 25,
RV_REG_S10 = 26,
RV_REG_S11 = 27,
RV_REG_T3 = 28, /* Temporaries */
RV_REG_T4 = 29,
RV_REG_T5 = 30,
RV_REG_T6 = 31,
};
struct rv_jit_context {
struct bpf_prog *prog;
u32 *insns; /* RV insns */
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
unsigned long flags;
int stack_size;
};
struct rv_jit_data {
struct bpf_binary_header *header;
u8 *image;
struct rv_jit_context ctx;
};
static inline void bpf_fill_ill_insns(void *area, unsigned int size)
{
memset(area, 0, size);
}
static inline void bpf_flush_icache(void *start, void *end)
{
flush_icache_range((unsigned long)start, (unsigned long)end);
}
static inline void emit(const u32 insn, struct rv_jit_context *ctx)
{
if (ctx->insns)
ctx->insns[ctx->ninsns] = insn;
ctx->ninsns++;
}
static inline int epilogue_offset(struct rv_jit_context *ctx)
{
int to = ctx->epilogue_offset, from = ctx->ninsns;
return (to - from) << 2;
}
/* Return -1 or inverted cond. */
static inline int invert_bpf_cond(u8 cond)
{
switch (cond) {
case BPF_JEQ:
return BPF_JNE;
case BPF_JGT:
return BPF_JLE;
case BPF_JLT:
return BPF_JGE;
case BPF_JGE:
return BPF_JLT;
case BPF_JLE:
return BPF_JGT;
case BPF_JNE:
return BPF_JEQ;
case BPF_JSGT:
return BPF_JSLE;
case BPF_JSLT:
return BPF_JSGE;
case BPF_JSGE:
return BPF_JSLT;
case BPF_JSLE:
return BPF_JSGT;
}
return -1;
}
static inline bool is_12b_int(long val)
{
return -(1L << 11) <= val && val < (1L << 11);
}
static inline int is_12b_check(int off, int insn)
{
if (!is_12b_int(off)) {
pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n",
insn, (int)off);
return -1;
}
return 0;
}
static inline bool is_13b_int(long val)
{
return -(1L << 12) <= val && val < (1L << 12);
}
static inline bool is_21b_int(long val)
{
return -(1L << 20) <= val && val < (1L << 20);
}
static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
{
int from, to;
off++; /* BPF branch is from PC+1, RV is from PC */
from = (insn > 0) ? ctx->offset[insn - 1] : 0;
to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
return (to - from) << 2;
}
/* Instruction formats. */
static inline u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd,
u8 opcode)
{
return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
(rd << 7) | opcode;
}
static inline u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode)
{
return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) |
opcode;
}
static inline u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode)
{
u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f;
return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
(imm4_0 << 7) | opcode;
}
static inline u32 rv_b_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode)
{
u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4);
u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10);
return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
(imm4_1 << 7) | opcode;
}
static inline u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode)
{
return (imm31_12 << 12) | (rd << 7) | opcode;
}
static inline u32 rv_j_insn(u32 imm20_1, u8 rd, u8 opcode)
{
u32 imm;
imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) |
((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11);
return (imm << 12) | (rd << 7) | opcode;
}
static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
u8 funct3, u8 rd, u8 opcode)
{
u8 funct7 = (funct5 << 2) | (aq << 1) | rl;
return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
}
/* Instructions shared by both RV32 and RV64. */
static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 0, rd, 0x13);
}
static inline u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 7, rd, 0x13);
}
static inline u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 6, rd, 0x13);
}
static inline u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 4, rd, 0x13);
}
static inline u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 1, rd, 0x13);
}
static inline u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 5, rd, 0x13);
}
static inline u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13);
}
static inline u32 rv_lui(u8 rd, u32 imm31_12)
{
return rv_u_insn(imm31_12, rd, 0x37);
}
static inline u32 rv_auipc(u8 rd, u32 imm31_12)
{
return rv_u_insn(imm31_12, rd, 0x17);
}
static inline u32 rv_add(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 0, rd, 0x33);
}
static inline u32 rv_sub(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33);
}
static inline u32 rv_sltu(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 3, rd, 0x33);
}
static inline u32 rv_and(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 7, rd, 0x33);
}
static inline u32 rv_or(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 6, rd, 0x33);
}
static inline u32 rv_xor(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 4, rd, 0x33);
}
static inline u32 rv_sll(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 1, rd, 0x33);
}
static inline u32 rv_srl(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 5, rd, 0x33);
}
static inline u32 rv_sra(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33);
}
static inline u32 rv_mul(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 0, rd, 0x33);
}
static inline u32 rv_mulhu(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 3, rd, 0x33);
}
static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 5, rd, 0x33);
}
static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 7, rd, 0x33);
}
static inline u32 rv_jal(u8 rd, u32 imm20_1)
{
return rv_j_insn(imm20_1, rd, 0x6f);
}
static inline u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 0, rd, 0x67);
}
static inline u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_b_insn(imm12_1, rs2, rs1, 0, 0x63);
}
static inline u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_b_insn(imm12_1, rs2, rs1, 1, 0x63);
}
static inline u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_b_insn(imm12_1, rs2, rs1, 6, 0x63);
}
static inline u32 rv_bgtu(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_bltu(rs2, rs1, imm12_1);
}
static inline u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_b_insn(imm12_1, rs2, rs1, 7, 0x63);
}
static inline u32 rv_bleu(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_bgeu(rs2, rs1, imm12_1);
}
static inline u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_b_insn(imm12_1, rs2, rs1, 4, 0x63);
}
static inline u32 rv_bgt(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_blt(rs2, rs1, imm12_1);
}
static inline u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_b_insn(imm12_1, rs2, rs1, 5, 0x63);
}
static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_bge(rs2, rs1, imm12_1);
}
static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 2, rd, 0x03);
}
static inline u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 4, rd, 0x03);
}
static inline u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 5, rd, 0x03);
}
static inline u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23);
}
static inline u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23);
}
static inline u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23);
}
static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
{
return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
}
/*
* RV64-only instructions.
*
* These instructions are not available on RV32. Wrap them below a #if to
* ensure that the RV32 JIT doesn't emit any of these instructions.
*/
#if __riscv_xlen == 64
static inline u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b);
}
static inline u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b);
}
static inline u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b);
}
static inline u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b);
}
static inline u32 rv_addw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b);
}
static inline u32 rv_subw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b);
}
static inline u32 rv_sllw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b);
}
static inline u32 rv_srlw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b);
}
static inline u32 rv_sraw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b);
}
static inline u32 rv_mulw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b);
}
static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b);
}
static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b);
}
static inline u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 3, rd, 0x03);
}
static inline u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 6, rd, 0x03);
}
static inline u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23);
}
static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
{
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
#endif /* __riscv_xlen == 64 */
void bpf_jit_build_prologue(struct rv_jit_context *ctx);
void bpf_jit_build_epilogue(struct rv_jit_context *ctx);
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass);
#endif /* _BPF_JIT_H */

File diff suppressed because it is too large Load Diff

View File

@ -7,42 +7,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <asm/cacheflush.h>
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
RV_REG_SP = 2, /* Stack pointer */
RV_REG_GP = 3, /* Global pointer */
RV_REG_TP = 4, /* Thread pointer */
RV_REG_T0 = 5, /* Temporaries */
RV_REG_T1 = 6,
RV_REG_T2 = 7,
RV_REG_FP = 8,
RV_REG_S1 = 9, /* Saved registers */
RV_REG_A0 = 10, /* Function argument/return values */
RV_REG_A1 = 11, /* Function arguments */
RV_REG_A2 = 12,
RV_REG_A3 = 13,
RV_REG_A4 = 14,
RV_REG_A5 = 15,
RV_REG_A6 = 16,
RV_REG_A7 = 17,
RV_REG_S2 = 18, /* Saved registers */
RV_REG_S3 = 19,
RV_REG_S4 = 20,
RV_REG_S5 = 21,
RV_REG_S6 = 22,
RV_REG_S7 = 23,
RV_REG_S8 = 24,
RV_REG_S9 = 25,
RV_REG_S10 = 26,
RV_REG_S11 = 27,
RV_REG_T3 = 28, /* Temporaries */
RV_REG_T4 = 29,
RV_REG_T5 = 30,
RV_REG_T6 = 31,
};
#include "bpf_jit.h"
#define RV_REG_TCC RV_REG_A6
#define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
@ -73,22 +38,6 @@ enum {
RV_CTX_F_SEEN_S6 = RV_REG_S6,
};
struct rv_jit_context {
struct bpf_prog *prog;
u32 *insns; /* RV insns */
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
unsigned long flags;
int stack_size;
};
struct rv_jit_data {
struct bpf_binary_header *header;
u8 *image;
struct rv_jit_context ctx;
};
static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
{
u8 reg = regmap[bpf_reg];
@ -156,346 +105,11 @@ static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
return RV_REG_A6;
}
static void emit(const u32 insn, struct rv_jit_context *ctx)
{
if (ctx->insns)
ctx->insns[ctx->ninsns] = insn;
ctx->ninsns++;
}
static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode)
{
return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
(rd << 7) | opcode;
}
static u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode)
{
return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) |
opcode;
}
static u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode)
{
u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f;
return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
(imm4_0 << 7) | opcode;
}
static u32 rv_sb_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode)
{
u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4);
u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10);
return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
(imm4_1 << 7) | opcode;
}
static u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode)
{
return (imm31_12 << 12) | (rd << 7) | opcode;
}
static u32 rv_uj_insn(u32 imm20_1, u8 rd, u8 opcode)
{
u32 imm;
imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) |
((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11);
return (imm << 12) | (rd << 7) | opcode;
}
static u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
u8 funct3, u8 rd, u8 opcode)
{
u8 funct7 = (funct5 << 2) | (aq << 1) | rl;
return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
}
static u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b);
}
static u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 0, rd, 0x13);
}
static u32 rv_addw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b);
}
static u32 rv_add(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 0, rd, 0x33);
}
static u32 rv_subw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b);
}
static u32 rv_sub(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33);
}
static u32 rv_and(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 7, rd, 0x33);
}
static u32 rv_or(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 6, rd, 0x33);
}
static u32 rv_xor(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 4, rd, 0x33);
}
static u32 rv_mulw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b);
}
static u32 rv_mul(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 0, rd, 0x33);
}
static u32 rv_divuw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b);
}
static u32 rv_divu(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 5, rd, 0x33);
}
static u32 rv_remuw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b);
}
static u32 rv_remu(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(1, rs2, rs1, 7, rd, 0x33);
}
static u32 rv_sllw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b);
}
static u32 rv_sll(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 1, rd, 0x33);
}
static u32 rv_srlw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b);
}
static u32 rv_srl(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0, rs2, rs1, 5, rd, 0x33);
}
static u32 rv_sraw(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b);
}
static u32 rv_sra(u8 rd, u8 rs1, u8 rs2)
{
return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33);
}
static u32 rv_lui(u8 rd, u32 imm31_12)
{
return rv_u_insn(imm31_12, rd, 0x37);
}
static u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 1, rd, 0x13);
}
static u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 7, rd, 0x13);
}
static u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 6, rd, 0x13);
}
static u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 4, rd, 0x13);
}
static u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b);
}
static u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b);
}
static u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 5, rd, 0x13);
}
static u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b);
}
static u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13);
}
static u32 rv_jal(u8 rd, u32 imm20_1)
{
return rv_uj_insn(imm20_1, rd, 0x6f);
}
static u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0)
{
return rv_i_insn(imm11_0, rs1, 0, rd, 0x67);
}
static u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_sb_insn(imm12_1, rs2, rs1, 0, 0x63);
}
static u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_sb_insn(imm12_1, rs2, rs1, 6, 0x63);
}
static u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_sb_insn(imm12_1, rs2, rs1, 7, 0x63);
}
static u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_sb_insn(imm12_1, rs2, rs1, 1, 0x63);
}
static u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_sb_insn(imm12_1, rs2, rs1, 4, 0x63);
}
static u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1)
{
return rv_sb_insn(imm12_1, rs2, rs1, 5, 0x63);
}
static u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23);
}
static u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23);
}
static u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23);
}
static u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2)
{
return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23);
}
static u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 4, rd, 0x03);
}
static u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 5, rd, 0x03);
}
static u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 6, rd, 0x03);
}
static u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1)
{
return rv_i_insn(imm11_0, rs1, 3, rd, 0x03);
}
static u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
{
return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
}
static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
{
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
static u32 rv_auipc(u8 rd, u32 imm31_12)
{
return rv_u_insn(imm31_12, rd, 0x17);
}
static bool is_12b_int(s64 val)
{
return -(1 << 11) <= val && val < (1 << 11);
}
static bool is_13b_int(s64 val)
{
return -(1 << 12) <= val && val < (1 << 12);
}
static bool is_21b_int(s64 val)
{
return -(1L << 20) <= val && val < (1L << 20);
}
static bool is_32b_int(s64 val)
{
return -(1L << 31) <= val && val < (1L << 31);
}
static int is_12b_check(int off, int insn)
{
if (!is_12b_int(off)) {
pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n",
insn, (int)off);
return -1;
}
return 0;
}
static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
{
/* Note that the immediate from the add is sign-extended,
@ -535,23 +149,6 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
emit(rv_addi(rd, rd, lower), ctx);
}
static int rv_offset(int insn, int off, struct rv_jit_context *ctx)
{
int from, to;
off++; /* BPF branch is from PC+1, RV is from PC */
from = (insn > 0) ? ctx->offset[insn - 1] : 0;
to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
return (to - from) << 2;
}
static int epilogue_offset(struct rv_jit_context *ctx)
{
int to = ctx->epilogue_offset, from = ctx->ninsns;
return (to - from) << 2;
}
static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
{
int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
@ -596,34 +193,6 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
ctx);
}
/* return -1 or inverted cond */
static int invert_bpf_cond(u8 cond)
{
switch (cond) {
case BPF_JEQ:
return BPF_JNE;
case BPF_JGT:
return BPF_JLE;
case BPF_JLT:
return BPF_JGE;
case BPF_JGE:
return BPF_JLT;
case BPF_JLE:
return BPF_JGT;
case BPF_JNE:
return BPF_JEQ;
case BPF_JSGT:
return BPF_JSLE;
case BPF_JSLT:
return BPF_JSGE;
case BPF_JSGE:
return BPF_JSLT;
case BPF_JSLE:
return BPF_JSGT;
}
return -1;
}
static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
struct rv_jit_context *ctx)
{
@ -855,8 +424,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
return 0;
}
static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
BPF_CLASS(insn->code) == BPF_JMP;
@ -1434,7 +1003,7 @@ out_be:
return 0;
}
static void build_prologue(struct rv_jit_context *ctx)
void bpf_jit_build_prologue(struct rv_jit_context *ctx)
{
int stack_adjust = 0, store_offset, bpf_stack_adjust;
@ -1515,175 +1084,11 @@ static void build_prologue(struct rv_jit_context *ctx)
ctx->stack_size = stack_adjust;
}
static void build_epilogue(struct rv_jit_context *ctx)
void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
{
__build_epilogue(false, ctx);
}
static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
{
const struct bpf_prog *prog = ctx->prog;
int i;
for (i = 0; i < prog->len; i++) {
const struct bpf_insn *insn = &prog->insnsi[i];
int ret;
ret = emit_insn(insn, ctx, extra_pass);
if (ret > 0) {
i++;
if (offset)
offset[i] = ctx->ninsns;
continue;
}
if (offset)
offset[i] = ctx->ninsns;
if (ret)
return ret;
}
return 0;
}
static void bpf_fill_ill_insns(void *area, unsigned int size)
{
memset(area, 0, size);
}
static void bpf_flush_icache(void *start, void *end)
{
flush_icache_range((unsigned long)start, (unsigned long)end);
}
bool bpf_jit_needs_zext(void)
{
return true;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
unsigned int image_size = 0;
struct rv_jit_context *ctx;
if (!prog->jit_requested)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
if (IS_ERR(tmp))
return orig_prog;
if (tmp != prog) {
tmp_blinded = true;
prog = tmp;
}
jit_data = prog->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
prog = orig_prog;
goto out;
}
prog->aux->jit_data = jit_data;
}
ctx = &jit_data->ctx;
if (ctx->offset) {
extra_pass = true;
image_size = sizeof(u32) * ctx->ninsns;
goto skip_init_ctx;
}
ctx->prog = prog;
ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
if (!ctx->offset) {
prog = orig_prog;
goto out_offset;
}
for (i = 0; i < prog->len; i++) {
prev_ninsns += 32;
ctx->offset[i] = prev_ninsns;
}
for (i = 0; i < 16; i++) {
pass++;
ctx->ninsns = 0;
if (build_body(ctx, extra_pass, ctx->offset)) {
prog = orig_prog;
goto out_offset;
}
build_prologue(ctx);
ctx->epilogue_offset = ctx->ninsns;
build_epilogue(ctx);
if (ctx->ninsns == prev_ninsns) {
if (jit_data->header)
break;
image_size = sizeof(u32) * ctx->ninsns;
jit_data->header =
bpf_jit_binary_alloc(image_size,
&jit_data->image,
sizeof(u32),
bpf_fill_ill_insns);
if (!jit_data->header) {
prog = orig_prog;
goto out_offset;
}
ctx->insns = (u32 *)jit_data->image;
/* Now, when the image is allocated, the image
* can potentially shrink more (auipc/jalr ->
* jal).
*/
}
prev_ninsns = ctx->ninsns;
}
if (i == 16) {
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
skip_init_ctx:
pass++;
ctx->ninsns = 0;
build_prologue(ctx);
if (build_body(ctx, extra_pass, NULL)) {
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
build_epilogue(ctx);
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
prog->bpf_func = (void *)ctx->insns;
prog->jited = 1;
prog->jited_len = image_size;
bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
if (!prog->is_func || extra_pass) {
out_offset:
kfree(ctx->offset);
kfree(jit_data);
prog->aux->jit_data = NULL;
}
out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ?
tmp : orig_prog);
return prog;
}
void *bpf_jit_alloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,

View File

@ -0,0 +1,166 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Common functionality for RV32 and RV64 BPF JIT compilers
*
* Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com>
*
*/
#include <linux/bpf.h>
#include <linux/filter.h>
#include "bpf_jit.h"
/* Number of iterations to try until offsets converge. */
#define NR_JIT_ITERATIONS 16
static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
{
const struct bpf_prog *prog = ctx->prog;
int i;
for (i = 0; i < prog->len; i++) {
const struct bpf_insn *insn = &prog->insnsi[i];
int ret;
ret = bpf_jit_emit_insn(insn, ctx, extra_pass);
/* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */
if (ret > 0)
i++;
if (offset)
offset[i] = ctx->ninsns;
if (ret < 0)
return ret;
}
return 0;
}
bool bpf_jit_needs_zext(void)
{
return true;
}
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
struct rv_jit_context *ctx;
unsigned int image_size = 0;
if (!prog->jit_requested)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
if (IS_ERR(tmp))
return orig_prog;
if (tmp != prog) {
tmp_blinded = true;
prog = tmp;
}
jit_data = prog->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
prog = orig_prog;
goto out;
}
prog->aux->jit_data = jit_data;
}
ctx = &jit_data->ctx;
if (ctx->offset) {
extra_pass = true;
image_size = sizeof(u32) * ctx->ninsns;
goto skip_init_ctx;
}
ctx->prog = prog;
ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
if (!ctx->offset) {
prog = orig_prog;
goto out_offset;
}
for (i = 0; i < prog->len; i++) {
prev_ninsns += 32;
ctx->offset[i] = prev_ninsns;
}
for (i = 0; i < NR_JIT_ITERATIONS; i++) {
pass++;
ctx->ninsns = 0;
if (build_body(ctx, extra_pass, ctx->offset)) {
prog = orig_prog;
goto out_offset;
}
bpf_jit_build_prologue(ctx);
ctx->epilogue_offset = ctx->ninsns;
bpf_jit_build_epilogue(ctx);
if (ctx->ninsns == prev_ninsns) {
if (jit_data->header)
break;
image_size = sizeof(u32) * ctx->ninsns;
jit_data->header =
bpf_jit_binary_alloc(image_size,
&jit_data->image,
sizeof(u32),
bpf_fill_ill_insns);
if (!jit_data->header) {
prog = orig_prog;
goto out_offset;
}
ctx->insns = (u32 *)jit_data->image;
/*
* Now, when the image is allocated, the image can
* potentially shrink more (auipc/jalr -> jal).
*/
}
prev_ninsns = ctx->ninsns;
}
if (i == NR_JIT_ITERATIONS) {
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
skip_init_ctx:
pass++;
ctx->ninsns = 0;
bpf_jit_build_prologue(ctx);
if (build_body(ctx, extra_pass, NULL)) {
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
bpf_jit_build_epilogue(ctx);
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
prog->bpf_func = (void *)ctx->insns;
prog->jited = 1;
prog->jited_len = image_size;
bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
if (!prog->is_func || extra_pass) {
out_offset:
kfree(ctx->offset);
kfree(jit_data);
prog->aux->jit_data = NULL;
}
out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ?
tmp : orig_prog);
return prog;
}

View File

@ -238,7 +238,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
}
}
static inline int is_kernel_text(unsigned long addr)
/*
* The <linux/kallsyms.h> already defines is_kernel_text,
* using '__' prefix not to get in conflict.
*/
static inline int __is_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
return 1;
@ -328,8 +332,8 @@ repeat:
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
PAGE_OFFSET + PAGE_SIZE-1;
if (is_kernel_text(addr) ||
is_kernel_text(addr2))
if (__is_kernel_text(addr) ||
__is_kernel_text(addr2))
prot = PAGE_KERNEL_LARGE_EXEC;
pages_2m++;
@ -354,7 +358,7 @@ repeat:
*/
pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
if (is_kernel_text(addr))
if (__is_kernel_text(addr))
prot = PAGE_KERNEL_EXEC;
pages_4k++;
@ -881,7 +885,7 @@ static void mark_nxdata_nx(void)
*/
unsigned long start = PFN_ALIGN(_etext);
/*
* This comes from is_kernel_text upper limit. Also HPAGE where used:
* This comes from __is_kernel_text upper limit. Also HPAGE where used:
*/
unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;

View File

@ -1361,41 +1361,144 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
-(stack_size - i * 8));
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_prog **progs, int prog_cnt, int stack_size)
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_prog *p, int stack_size, bool mod_ret)
{
u8 *prog = *pprog;
int cnt = 0, i;
int cnt = 0;
for (i = 0; i < prog_cnt; i++) {
if (emit_call(&prog, __bpf_prog_enter, prog))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
if (emit_call(&prog, __bpf_prog_enter, prog))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
/* arg2: progs[i]->insnsi for interpreter */
if (!progs[i]->jited)
emit_mov_imm64(&prog, BPF_REG_2,
(long) progs[i]->insnsi >> 32,
(u32) (long) progs[i]->insnsi);
/* call JITed bpf program or interpreter */
if (emit_call(&prog, progs[i]->bpf_func, prog))
return -EINVAL;
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
/* arg2: progs[i]->insnsi for interpreter */
if (!p->jited)
emit_mov_imm64(&prog, BPF_REG_2,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
/* call JITed bpf program or interpreter */
if (emit_call(&prog, p->bpf_func, prog))
return -EINVAL;
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
(u32) (long) progs[i]);
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
if (emit_call(&prog, __bpf_prog_exit, prog))
/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
* of the previous call which is then passed on the stack to
* the next BPF program.
*/
if (mod_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
(u32) (long) p);
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
if (emit_call(&prog, __bpf_prog_exit, prog))
return -EINVAL;
*pprog = prog;
return 0;
}
static void emit_nops(u8 **pprog, unsigned int len)
{
unsigned int i, noplen;
u8 *prog = *pprog;
int cnt = 0;
while (len > 0) {
noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
for (i = 0; i < noplen; i++)
EMIT1(ideal_nops[noplen][i]);
len -= noplen;
}
*pprog = prog;
}
static void emit_align(u8 **pprog, u32 align)
{
u8 *target, *prog = *pprog;
target = PTR_ALIGN(prog, align);
if (target != prog)
emit_nops(&prog, target - prog);
*pprog = prog;
}
static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
{
u8 *prog = *pprog;
int cnt = 0;
s64 offset;
offset = func - (ip + 2 + 4);
if (!is_simm32(offset)) {
pr_err("Target %p is out of range\n", func);
return -EINVAL;
}
EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
*pprog = prog;
return 0;
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size)
{
int i;
u8 *prog = *pprog;
for (i = 0; i < tp->nr_progs; i++) {
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
return -EINVAL;
}
*pprog = prog;
return 0;
}
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size,
u8 **branches)
{
u8 *prog = *pprog;
int i, cnt = 0;
/* The first fmod_ret program will receive a garbage return value.
* Set this to 0 to avoid confusing the program.
*/
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tp->nr_progs; i++) {
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true))
return -EINVAL;
/* mod_ret prog stored return value into [rbp - 8]. Emit:
* if (*(u64 *)(rbp - 8) != 0)
* goto do_fexit;
*/
/* cmp QWORD PTR [rbp - 0x8], 0x0 */
EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00);
/* Save the location of the branch and Generate 6 nops
* (4 bytes for an offset and 2 bytes for the jump) These nops
* are replaced with a conditional jump once do_fexit (i.e. the
* start of the fexit invocation) is finalized.
*/
branches[i] = prog;
emit_nops(&prog, 4 + 2);
}
*pprog = prog;
return 0;
}
/* Example:
* __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
* its 'struct btf_func_model' will be nr_args=2
@ -1458,12 +1561,15 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_prog **fentry_progs, int fentry_cnt,
struct bpf_prog **fexit_progs, int fexit_cnt,
struct bpf_tramp_progs *tprogs,
void *orig_call)
{
int cnt = 0, nr_args = m->nr_args;
int ret, i, cnt = 0, nr_args = m->nr_args;
int stack_size = nr_args * 8;
struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
u8 **branches = NULL;
u8 *prog;
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
@ -1492,28 +1598,64 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
save_regs(m, &prog, nr_args, stack_size);
if (fentry_cnt)
if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
if (fentry->nr_progs)
if (invoke_bpf(m, &prog, fentry, stack_size))
return -EINVAL;
if (fmod_ret->nr_progs) {
branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size,
branches)) {
ret = -EINVAL;
goto cleanup;
}
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
if (fentry_cnt)
if (fentry->nr_progs || fmod_ret->nr_progs)
restore_regs(m, &prog, nr_args, stack_size);
/* call original function */
if (emit_call(&prog, orig_call, prog))
return -EINVAL;
if (emit_call(&prog, orig_call, prog)) {
ret = -EINVAL;
goto cleanup;
}
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
}
if (fexit_cnt)
if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
return -EINVAL;
if (fmod_ret->nr_progs) {
/* From Intel 64 and IA-32 Architectures Optimization
* Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
* Coding Rule 11: All branch targets should be 16-byte
* aligned.
*/
emit_align(&prog, 16);
/* Update the branches saved in invoke_bpf_mod_ret with the
* aligned address of do_fexit.
*/
for (i = 0; i < fmod_ret->nr_progs; i++)
emit_cond_near_jump(&branches[i], prog, branches[i],
X86_JNE);
}
if (fexit->nr_progs)
if (invoke_bpf(m, &prog, fexit, stack_size)) {
ret = -EINVAL;
goto cleanup;
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
restore_regs(m, &prog, nr_args, stack_size);
/* This needs to be done regardless. If there were fmod_ret programs,
* the return value is only updated on the stack and still needs to be
* restored to R0.
*/
if (flags & BPF_TRAMP_F_CALL_ORIG)
/* restore original return value back into RAX */
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
@ -1525,45 +1667,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
EMIT1(0xC3); /* ret */
/* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY))
return -EFAULT;
return prog - (u8 *)image;
}
static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
{
u8 *prog = *pprog;
int cnt = 0;
s64 offset;
offset = func - (ip + 2 + 4);
if (!is_simm32(offset)) {
pr_err("Target %p is out of range\n", func);
return -EINVAL;
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
goto cleanup;
}
EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
*pprog = prog;
return 0;
}
ret = prog - (u8 *)image;
static void emit_nops(u8 **pprog, unsigned int len)
{
unsigned int i, noplen;
u8 *prog = *pprog;
int cnt = 0;
while (len > 0) {
noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
for (i = 0; i < noplen; i++)
EMIT1(ideal_nops[noplen][i]);
len -= noplen;
}
*pprog = prog;
cleanup:
kfree(branches);
return ret;
}
static int emit_fallback_jump(u8 **pprog)
@ -1588,7 +1700,7 @@ static int emit_fallback_jump(u8 **pprog)
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *jg_target, *prog = *pprog;
u8 *jg_reloc, *prog = *pprog;
int pivot, err, jg_bytes = 1, cnt = 0;
s64 jg_offset;
@ -1643,9 +1755,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
* Coding Rule 11: All branch targets should be 16-byte
* aligned.
*/
jg_target = PTR_ALIGN(prog, 16);
if (jg_target != prog)
emit_nops(&prog, jg_target - prog);
emit_align(&prog, 16);
jg_offset = prog - jg_reloc;
emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);

View File

@ -247,6 +247,20 @@ out_invalid:
return ERR_PTR(-EINVAL);
}
/**
* ns_match() - Returns true if current namespace matches dev/ino provided.
* @ns_common: current ns
* @dev: dev_t from nsfs that will be matched against current nsfs
* @ino: ino_t from nsfs that will be matched against current nsfs
*
* Return: true if dev and ino matches the current nsfs.
*/
bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino)
{
return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev);
}
static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);

View File

@ -18,6 +18,7 @@
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@ -433,6 +434,16 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
#define BPF_MAX_TRAMP_PROGS 40
struct bpf_tramp_progs {
struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
int nr_progs;
};
/* Different use cases for BPF trampoline:
* 1. replace nop at the function entry (kprobe equivalent)
* flags = BPF_TRAMP_F_RESTORE_REGS
@ -455,16 +466,25 @@ struct btf_func_model {
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_prog **fentry_progs, int fentry_cnt,
struct bpf_prog **fexit_progs, int fexit_cnt,
struct bpf_tramp_progs *tprogs,
void *orig_call);
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
struct bpf_ksym {
unsigned long start;
unsigned long end;
char name[KSYM_NAME_LEN];
struct list_head lnode;
struct latch_tree_node tnode;
bool prog;
};
enum bpf_tramp_prog_type {
BPF_TRAMP_FENTRY,
BPF_TRAMP_FEXIT,
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
};
@ -493,6 +513,7 @@ struct bpf_trampoline {
/* Executable image of trampoline */
void *image;
u64 selector;
struct bpf_ksym ksym;
};
#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
@ -510,9 +531,10 @@ struct bpf_dispatcher {
int num_progs;
void *image;
u32 image_off;
struct bpf_ksym ksym;
};
static __always_inline unsigned int bpf_dispatcher_nopfunc(
static __always_inline unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
unsigned int (*bpf_func)(const void *,
@ -525,17 +547,21 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
#define BPF_DISPATCHER_INIT(name) { \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.func = &name##func, \
.progs = {}, \
.num_progs = 0, \
.image = NULL, \
.image_off = 0 \
#define BPF_DISPATCHER_INIT(_name) { \
.mutex = __MUTEX_INITIALIZER(_name.mutex), \
.func = &_name##_func, \
.progs = {}, \
.num_progs = 0, \
.image = NULL, \
.image_off = 0, \
.ksym = { \
.name = #_name, \
.lnode = LIST_HEAD_INIT(_name.ksym.lnode), \
}, \
}
#define DEFINE_BPF_DISPATCHER(name) \
noinline unsigned int name##func( \
noinline unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
unsigned int (*bpf_func)(const void *, \
@ -543,26 +569,26 @@ void bpf_trampoline_put(struct bpf_trampoline *tr);
{ \
return bpf_func(ctx, insnsi); \
} \
EXPORT_SYMBOL(name##func); \
struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name);
EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \
struct bpf_dispatcher bpf_dispatcher_##name = \
BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
#define DECLARE_BPF_DISPATCHER(name) \
unsigned int name##func( \
unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
unsigned int (*bpf_func)(const void *, \
const struct bpf_insn *)); \
extern struct bpf_dispatcher name;
#define BPF_DISPATCHER_FUNC(name) name##func
#define BPF_DISPATCHER_PTR(name) (&name)
extern struct bpf_dispatcher bpf_dispatcher_##name;
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
struct bpf_image {
struct latch_tree_node tnode;
unsigned char data[];
};
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
bool is_bpf_image_address(unsigned long address);
void *bpf_image_alloc(void);
/* Called only from JIT-enabled code, so there's no need for stubs. */
void *bpf_jit_alloc_exec_page(void);
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@ -579,7 +605,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
#define DECLARE_BPF_DISPATCHER(name)
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func
#define BPF_DISPATCHER_PTR(name) NULL
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from,
@ -640,8 +666,7 @@ struct bpf_prog_aux {
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
u32 size_poke_tab;
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
struct bpf_prog *prog;
@ -1056,6 +1081,21 @@ extern int sysctl_unprivileged_bpf_disabled;
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
struct bpf_link;
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link);
};
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog);
void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);
@ -1133,6 +1173,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_tracing(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
@ -1290,6 +1333,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
return -ENOTSUPP;
}
static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
return -ENOTSUPP;
}
static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
@ -1386,6 +1436,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
#if defined(CONFIG_BPF_STREAM_PARSER)
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
static inline int sock_map_prog_update(struct bpf_map *map,
struct bpf_prog *prog, u32 which)
@ -1398,7 +1450,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
{
return -EINVAL;
}
#endif
#endif /* CONFIG_BPF_STREAM_PARSER */
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
@ -1459,6 +1511,7 @@ extern const struct bpf_func_proto bpf_strtol_proto;
extern const struct bpf_func_proto bpf_strtoul_proto;
extern const struct bpf_func_proto bpf_tcp_sock_proto;
extern const struct bpf_func_proto bpf_jiffies64_proto;
extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);

View File

@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
ret; })
#define BPF_PROG_RUN(prog, ctx) \
__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)
__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func)
/*
* Use in preemptible and therefore migratable context to make sure that
@ -596,7 +596,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
u32 ret;
migrate_disable();
ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func);
migrate_enable();
return ret;
}
@ -722,7 +722,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
return res;
}
DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp)
DECLARE_BPF_DISPATCHER(xdp)
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
@ -733,8 +733,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* already takes rcu_read_lock() when fetching the program, so
* it's not necessary here anymore.
*/
return __BPF_PROG_RUN(prog, xdp,
BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp));
return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
}
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
@ -1084,7 +1083,6 @@ bpf_address_lookup(unsigned long addr, unsigned long *size,
void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);
void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
#else /* CONFIG_BPF_JIT */
@ -1153,11 +1151,6 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
{
}
static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
sym[0] = '\0';
}
#endif /* CONFIG_BPF_JIT */
void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);

View File

@ -85,6 +85,8 @@ typedef struct ns_common *ns_get_path_helper_t(void *);
extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
void *private_data);
extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
extern void nsfs_init(void);

View File

@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
}
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
#if defined(CONFIG_BPF_STREAM_PARSER)
void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
#else
static inline void sk_psock_unlink(struct sock *sk,
struct sk_psock_link *link)
{
}
#endif
void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
@ -347,11 +339,23 @@ static inline void sk_psock_update_proto(struct sock *sk,
struct sk_psock *psock,
struct proto *ops)
{
psock->saved_unhash = sk->sk_prot->unhash;
psock->saved_close = sk->sk_prot->close;
psock->saved_write_space = sk->sk_write_space;
/* Initialize saved callbacks and original proto only once, since this
* function may be called multiple times for a psock, e.g. when
* psock->progs.msg_parser is updated.
*
* Since we've not installed the new proto, psock is not yet in use and
* we can initialize it without synchronization.
*/
if (!psock->sk_proto) {
struct proto *orig = READ_ONCE(sk->sk_prot);
psock->saved_unhash = orig->unhash;
psock->saved_close = orig->close;
psock->saved_write_space = sk->sk_write_space;
psock->sk_proto = orig;
}
psock->sk_proto = sk->sk_prot;
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, ops);
}
@ -360,7 +364,13 @@ static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
sk->sk_prot->unhash = psock->saved_unhash;
tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
if (inet_csk_has_ulp(sk)) {
tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
} else {
sk->sk_write_space = psock->saved_write_space;
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, psock->sk_proto);
}
}
static inline void sk_psock_set_state(struct sk_psock *psock,
@ -381,26 +391,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
return test_bit(bit, &psock->state);
}
static inline struct sk_psock *sk_psock_get_checked(struct sock *sk)
{
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
if (psock) {
if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
psock = ERR_PTR(-EBUSY);
goto out;
}
if (!refcount_inc_not_zero(&psock->refcnt))
psock = ERR_PTR(-EBUSY);
}
out:
rcu_read_unlock();
return psock;
}
static inline struct sk_psock *sk_psock_get(struct sock *sk)
{
struct sk_psock *psock;

View File

@ -335,4 +335,10 @@ static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
if (icsk->icsk_ack.pingpong < U8_MAX)
icsk->icsk_ack.pingpong++;
}
static inline bool inet_csk_has_ulp(struct sock *sk)
{
return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
}
#endif /* _INET_CONNECTION_SOCK_H */

View File

@ -2195,21 +2195,23 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
struct sk_msg;
struct sk_psock;
int tcp_bpf_init(struct sock *sk);
void tcp_bpf_reinit(struct sock *sk);
#ifdef CONFIG_BPF_STREAM_PARSER
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#else
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#ifdef CONFIG_NET_SOCK_MSG
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#else
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif
#endif /* CONFIG_NET_SOCK_MSG */
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF

View File

@ -503,4 +503,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
}
#ifdef CONFIG_BPF_STREAM_PARSER
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
#endif /* BPF_STREAM_PARSER */
#endif /* _UDP_H */

View File

@ -75,13 +75,17 @@ static inline void bpf_test_probe_##call(void) \
check_trace_callback_type_##call(__bpf_trace_##template); \
} \
typedef void (*btf_trace_##call)(void *__data, proto); \
static struct bpf_raw_event_map __used \
__attribute__((section("__bpf_raw_tp_map"))) \
__bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
.bpf_func = (void *)(btf_trace_##call)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
.writable_size = size, \
static union { \
struct bpf_raw_event_map event; \
btf_trace_##call handler; \
} __bpf_trace_tp_map_##call __used \
__attribute__((section("__bpf_raw_tp_map"))) = { \
.event = { \
.tp = &__tracepoint_##call, \
.bpf_func = __bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
.writable_size = size, \
}, \
};
#define FIRST(x, ...) x

View File

@ -210,6 +210,7 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
__MAX_BPF_ATTACH_TYPE
};
@ -325,44 +326,46 @@ enum bpf_attach_type {
#define BPF_PSEUDO_CALL 1
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
enum {
BPF_ANY = 0, /* create new element or update existing */
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
};
/* flags for BPF_MAP_CREATE command */
#define BPF_F_NO_PREALLOC (1U << 0)
enum {
BPF_F_NO_PREALLOC = (1U << 0),
/* Instead of having one common LRU list in the
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
* which can scale and perform better.
* Note, the LRU nodes (including free nodes) cannot be moved
* across different LRU lists.
*/
#define BPF_F_NO_COMMON_LRU (1U << 1)
BPF_F_NO_COMMON_LRU = (1U << 1),
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
#define BPF_OBJ_NAME_LEN 16U
BPF_F_NUMA_NODE = (1U << 2),
/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
BPF_F_RDONLY = (1U << 3),
BPF_F_WRONLY = (1U << 4),
/* Flag for stack_map, store build_id+offset instead of pointer */
#define BPF_F_STACK_BUILD_ID (1U << 5)
BPF_F_STACK_BUILD_ID = (1U << 5),
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
BPF_F_ZERO_SEED = (1U << 6),
/* Flags for accessing BPF object from program side. */
#define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8)
BPF_F_RDONLY_PROG = (1U << 7),
BPF_F_WRONLY_PROG = (1U << 8),
/* Clone map from listener for newly accepted socket */
#define BPF_F_CLONE (1U << 9)
BPF_F_CLONE = (1U << 9),
/* Enable memory-mapping BPF map */
#define BPF_F_MMAPABLE (1U << 10)
BPF_F_MMAPABLE = (1U << 10),
};
/* Flags for BPF_PROG_QUERY. */
@ -391,6 +394,8 @@ struct bpf_stack_build_id {
};
};
#define BPF_OBJ_NAME_LEN 16U
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@ -2909,6 +2914,42 @@ union bpf_attr {
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
*
* int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
*
* **-ENOENT** if pidns does not exists for the current task.
*
* int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
* event must have the following attributes: **PERF_SAMPLE_RAW**
* as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
* **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
*
* The *flags* are used to indicate the index in *map* for which
* the value must be put, masked with **BPF_F_INDEX_MASK**.
* Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
* to indicate that the index of the current CPU core should be
* used.
*
* The value to write, of *size*, is passed through eBPF stack and
* pointed by *data*.
*
* *ctx* is a pointer to in-kernel struct xdp_buff.
*
* This helper is similar to **bpf_perf_eventoutput**\ () but
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -3030,7 +3071,9 @@ union bpf_attr {
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
FN(read_branch_records),
FN(read_branch_records), \
FN(get_ns_current_pid_tgid), \
FN(xdp_output),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -3045,72 +3088,100 @@ enum bpf_func_id {
/* All flags used by eBPF helper functions, placed here. */
/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
#define BPF_F_INVALIDATE_HASH (1ULL << 1)
enum {
BPF_F_RECOMPUTE_CSUM = (1ULL << 0),
BPF_F_INVALIDATE_HASH = (1ULL << 1),
};
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
*/
#define BPF_F_HDR_FIELD_MASK 0xfULL
enum {
BPF_F_HDR_FIELD_MASK = 0xfULL,
};
/* BPF_FUNC_l4_csum_replace flags. */
#define BPF_F_PSEUDO_HDR (1ULL << 4)
#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
#define BPF_F_MARK_ENFORCE (1ULL << 6)
enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
};
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
#define BPF_F_INGRESS (1ULL << 0)
enum {
BPF_F_INGRESS = (1ULL << 0),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
enum {
BPF_F_TUNINFO_IPV6 = (1ULL << 0),
};
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
#define BPF_F_SKIP_FIELD_MASK 0xffULL
#define BPF_F_USER_STACK (1ULL << 8)
enum {
BPF_F_SKIP_FIELD_MASK = 0xffULL,
BPF_F_USER_STACK = (1ULL << 8),
/* flags used by BPF_FUNC_get_stackid only. */
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
#define BPF_F_REUSE_STACKID (1ULL << 10)
BPF_F_FAST_STACK_CMP = (1ULL << 9),
BPF_F_REUSE_STACKID = (1ULL << 10),
/* flags used by BPF_FUNC_get_stack only. */
#define BPF_F_USER_BUILD_ID (1ULL << 11)
BPF_F_USER_BUILD_ID = (1ULL << 11),
};
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
#define BPF_F_SEQ_NUMBER (1ULL << 3)
enum {
BPF_F_ZERO_CSUM_TX = (1ULL << 1),
BPF_F_DONT_FRAGMENT = (1ULL << 2),
BPF_F_SEQ_NUMBER = (1ULL << 3),
};
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
*/
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
enum {
BPF_F_CURRENT_NETNS = (-1L),
};
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
enum {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
};
#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
enum {
BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff,
BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56,
};
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* BPF_FUNC_sysctl_get_name flags. */
#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};
/* BPF_FUNC_sk_storage_get flags */
#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
enum {
BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
};
/* BPF_FUNC_read_branch_records flags. */
#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0)
enum {
BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
};
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
@ -3176,6 +3247,7 @@ struct __sk_buff {
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size;
};
struct bpf_tunnel_key {
@ -3528,13 +3600,14 @@ struct bpf_sock_ops {
};
/* Definitions for bpf_sock_ops_cb_flags */
#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
* supported cb flags
*/
enum {
BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0),
BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
/* Mask of all currently supported cb flags */
BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
};
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
@ -3613,8 +3686,10 @@ enum {
BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
enum {
TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
};
struct bpf_perf_event_value {
__u64 counter;
@ -3622,12 +3697,16 @@ struct bpf_perf_event_value {
__u64 running;
};
#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
#define BPF_DEVCG_ACC_READ (1ULL << 1)
#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
enum {
BPF_DEVCG_ACC_MKNOD = (1ULL << 0),
BPF_DEVCG_ACC_READ = (1ULL << 1),
BPF_DEVCG_ACC_WRITE = (1ULL << 2),
};
#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
enum {
BPF_DEVCG_DEV_BLOCK = (1ULL << 0),
BPF_DEVCG_DEV_CHAR = (1ULL << 1),
};
struct bpf_cgroup_dev_ctx {
/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
@ -3643,8 +3722,10 @@ struct bpf_raw_tracepoint_args {
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
* OUTPUT: Do lookup from egress perspective; default is ingress
*/
#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
};
enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
@ -3716,9 +3797,11 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
enum {
BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0),
BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1),
BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2),
};
struct bpf_flow_keys {
__u16 nhoff;
@ -3784,4 +3867,8 @@ struct bpf_sockopt {
__s32 retval;
};
struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -320,6 +320,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
struct bpf_struct_ops_value *uvalue, *kvalue;
const struct btf_member *member;
const struct btf_type *t = st_ops->type;
struct bpf_tramp_progs *tprogs = NULL;
void *udata, *kdata;
int prog_fd, err = 0;
void *image;
@ -343,6 +344,10 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (uvalue->state || refcount_read(&uvalue->refcnt))
return -EINVAL;
tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
if (!tprogs)
return -ENOMEM;
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
@ -425,10 +430,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto reset_unlock;
}
tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
err = arch_prepare_bpf_trampoline(image,
st_map->image + PAGE_SIZE,
&st_ops->func_models[i], 0,
&prog, 1, NULL, 0, NULL);
tprogs, NULL);
if (err < 0)
goto reset_unlock;
@ -469,6 +476,7 @@ reset_unlock:
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
unlock:
kfree(tprogs);
mutex_unlock(&st_map->lock);
return err;
}

View File

@ -3710,13 +3710,26 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
nr_args--;
}
if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
arg == nr_args) {
if (!t)
/* Default prog with 5 args. 6th arg is retval. */
return true;
/* function return type */
t = btf_type_by_id(btf, t->type);
if (arg == nr_args) {
if (prog->expected_attach_type == BPF_TRACE_FEXIT) {
if (!t)
return true;
t = btf_type_by_id(btf, t->type);
} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
/* For now the BPF_MODIFY_RETURN can only be attached to
* functions that return an int.
*/
if (!t)
return false;
t = btf_type_skip_modifiers(btf, t->type, NULL);
if (!btf_type_is_int(t)) {
bpf_log(log,
"ret type %s not allowed for fmod_ret\n",
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
}
} else if (arg >= nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
tname, arg + 1);

View File

@ -97,7 +97,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
return fp;
}
@ -523,22 +523,22 @@ int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
int bpf_jit_harden __read_mostly;
long bpf_jit_limit __read_mostly;
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
unsigned long *symbol_start,
unsigned long *symbol_end)
static void
bpf_prog_ksym_set_addr(struct bpf_prog *prog)
{
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
unsigned long addr = (unsigned long)hdr;
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
*symbol_start = addr;
*symbol_end = addr + hdr->pages * PAGE_SIZE;
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
}
void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
static void
bpf_prog_ksym_set_name(struct bpf_prog *prog)
{
char *sym = prog->aux->ksym.name;
const char *end = sym + KSYM_NAME_LEN;
const struct btf_type *type;
const char *func_name;
@ -572,36 +572,27 @@ void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
*sym = 0;
}
static __always_inline unsigned long
bpf_get_prog_addr_start(struct latch_tree_node *n)
static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
{
unsigned long symbol_start, symbol_end;
const struct bpf_prog_aux *aux;
aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
return symbol_start;
return container_of(n, struct bpf_ksym, tnode)->start;
}
static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
struct latch_tree_node *b)
{
return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
}
static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
{
unsigned long val = (unsigned long)key;
unsigned long symbol_start, symbol_end;
const struct bpf_prog_aux *aux;
const struct bpf_ksym *ksym;
aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
ksym = container_of(n, struct bpf_ksym, tnode);
if (val < symbol_start)
if (val < ksym->start)
return -1;
if (val >= symbol_end)
if (val >= ksym->end)
return 1;
return 0;
@ -616,20 +607,29 @@ static DEFINE_SPINLOCK(bpf_lock);
static LIST_HEAD(bpf_kallsyms);
static struct latch_tree_root bpf_tree __cacheline_aligned;
static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
void bpf_ksym_add(struct bpf_ksym *ksym)
{
WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
spin_lock_bh(&bpf_lock);
WARN_ON_ONCE(!list_empty(&ksym->lnode));
list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
spin_unlock_bh(&bpf_lock);
}
static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
static void __bpf_ksym_del(struct bpf_ksym *ksym)
{
if (list_empty(&aux->ksym_lnode))
if (list_empty(&ksym->lnode))
return;
latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
list_del_rcu(&aux->ksym_lnode);
latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
list_del_rcu(&ksym->lnode);
}
void bpf_ksym_del(struct bpf_ksym *ksym)
{
spin_lock_bh(&bpf_lock);
__bpf_ksym_del(ksym);
spin_unlock_bh(&bpf_lock);
}
static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
@ -639,8 +639,8 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
return list_empty(&fp->aux->ksym_lnode) ||
fp->aux->ksym_lnode.prev == LIST_POISON2;
return list_empty(&fp->aux->ksym.lnode) ||
fp->aux->ksym.lnode.prev == LIST_POISON2;
}
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
@ -649,9 +649,11 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
!capable(CAP_SYS_ADMIN))
return;
spin_lock_bh(&bpf_lock);
bpf_prog_ksym_node_add(fp->aux);
spin_unlock_bh(&bpf_lock);
bpf_prog_ksym_set_addr(fp);
bpf_prog_ksym_set_name(fp);
fp->aux->ksym.prog = true;
bpf_ksym_add(&fp->aux->ksym);
}
void bpf_prog_kallsyms_del(struct bpf_prog *fp)
@ -659,33 +661,30 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
if (!bpf_prog_kallsyms_candidate(fp))
return;
spin_lock_bh(&bpf_lock);
bpf_prog_ksym_node_del(fp->aux);
spin_unlock_bh(&bpf_lock);
bpf_ksym_del(&fp->aux->ksym);
}
static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
{
struct latch_tree_node *n;
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
return n ?
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
NULL;
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}
const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
unsigned long symbol_start, symbol_end;
struct bpf_prog *prog;
struct bpf_ksym *ksym;
char *ret = NULL;
rcu_read_lock();
prog = bpf_prog_kallsyms_find(addr);
if (prog) {
bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
bpf_get_prog_name(prog, sym);
ksym = bpf_ksym_find(addr);
if (ksym) {
unsigned long symbol_start = ksym->start;
unsigned long symbol_end = ksym->end;
strncpy(sym, ksym->name, KSYM_NAME_LEN);
ret = sym;
if (size)
@ -703,19 +702,28 @@ bool is_bpf_text_address(unsigned long addr)
bool ret;
rcu_read_lock();
ret = bpf_prog_kallsyms_find(addr) != NULL;
ret = bpf_ksym_find(addr) != NULL;
rcu_read_unlock();
return ret;
}
static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
{
struct bpf_ksym *ksym = bpf_ksym_find(addr);
return ksym && ksym->prog ?
container_of(ksym, struct bpf_prog_aux, ksym)->prog :
NULL;
}
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
{
const struct exception_table_entry *e = NULL;
struct bpf_prog *prog;
rcu_read_lock();
prog = bpf_prog_kallsyms_find(addr);
prog = bpf_prog_ksym_find(addr);
if (!prog)
goto out;
if (!prog->aux->num_exentries)
@ -730,7 +738,7 @@ out:
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym)
{
struct bpf_prog_aux *aux;
struct bpf_ksym *ksym;
unsigned int it = 0;
int ret = -ERANGE;
@ -738,13 +746,13 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
return ret;
rcu_read_lock();
list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
if (it++ != symnum)
continue;
bpf_get_prog_name(aux->prog, sym);
strncpy(sym, ksym->name, KSYM_NAME_LEN);
*value = (unsigned long)aux->prog->bpf_func;
*value = ksym->start;
*type = BPF_SYM_ELF_TYPE;
ret = 0;
@ -2149,6 +2157,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{

View File

@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
noff = 0;
} else {
old = d->image + d->image_off;
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
noff = d->image_off ^ (PAGE_SIZE / 2);
}
new = d->num_progs ? d->image + noff : NULL;
@ -140,9 +140,10 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
d->image = bpf_image_alloc();
d->image = bpf_jit_alloc_exec_page();
if (!d->image)
goto out;
bpf_image_ksym_add(d->image, &d->ksym);
}
prev_num_progs = d->num_progs;

View File

@ -12,6 +12,8 @@
#include <linux/filter.h>
#include <linux/ctype.h>
#include <linux/jiffies.h>
#include <linux/pid_namespace.h>
#include <linux/proc_ns.h>
#include "../../lib/kstrtox.h"
@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.arg4_type = ARG_PTR_TO_LONG,
};
#endif
BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
struct bpf_pidns_info *, nsdata, u32, size)
{
struct task_struct *task = current;
struct pid_namespace *pidns;
int err = -EINVAL;
if (unlikely(size != sizeof(struct bpf_pidns_info)))
goto clear;
if (unlikely((u64)(dev_t)dev != dev))
goto clear;
if (unlikely(!task))
goto clear;
pidns = task_active_pid_ns(task);
if (unlikely(!pidns)) {
err = -ENOENT;
goto clear;
}
if (!ns_match(&pidns->ns, (dev_t)dev, ino))
goto clear;
nsdata->pid = task_pid_nr_ns(task, pidns);
nsdata->tgid = task_tgid_nr_ns(task, pidns);
return 0;
clear:
memset((void *)nsdata, 0, (size_t) size);
return err;
}
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
.func = bpf_get_ns_current_pid_tgid,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
.arg4_type = ARG_CONST_SIZE,
};

View File

@ -25,6 +25,7 @@ enum bpf_type {
BPF_TYPE_UNSPEC = 0,
BPF_TYPE_PROG,
BPF_TYPE_MAP,
BPF_TYPE_LINK,
};
static void *bpf_any_get(void *raw, enum bpf_type type)
@ -36,6 +37,9 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
case BPF_TYPE_MAP:
bpf_map_inc_with_uref(raw);
break;
case BPF_TYPE_LINK:
bpf_link_inc(raw);
break;
default:
WARN_ON_ONCE(1);
break;
@ -53,6 +57,9 @@ static void bpf_any_put(void *raw, enum bpf_type type)
case BPF_TYPE_MAP:
bpf_map_put_with_uref(raw);
break;
case BPF_TYPE_LINK:
bpf_link_put(raw);
break;
default:
WARN_ON_ONCE(1);
break;
@ -63,20 +70,32 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
{
void *raw;
*type = BPF_TYPE_MAP;
raw = bpf_map_get_with_uref(ufd);
if (IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
raw = bpf_prog_get(ufd);
if (!IS_ERR(raw)) {
*type = BPF_TYPE_MAP;
return raw;
}
return raw;
raw = bpf_prog_get(ufd);
if (!IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
return raw;
}
raw = bpf_link_get_from_fd(ufd);
if (!IS_ERR(raw)) {
*type = BPF_TYPE_LINK;
return raw;
}
return ERR_PTR(-EINVAL);
}
static const struct inode_operations bpf_dir_iops;
static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
static const struct inode_operations bpf_link_iops = { };
static struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir,
@ -114,6 +133,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
*type = BPF_TYPE_PROG;
else if (inode->i_op == &bpf_map_iops)
*type = BPF_TYPE_MAP;
else if (inode->i_op == &bpf_link_iops)
*type = BPF_TYPE_LINK;
else
return -EACCES;
@ -335,6 +356,12 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
&bpffs_map_fops : &bpffs_obj_fops);
}
static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
{
return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
&bpffs_obj_fops);
}
static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
@ -411,6 +438,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
case BPF_TYPE_MAP:
ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
break;
case BPF_TYPE_LINK:
ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
break;
default:
ret = -EPERM;
}
@ -487,6 +517,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
ret = bpf_prog_new_fd(raw);
else if (type == BPF_TYPE_MAP)
ret = bpf_map_new_fd(raw, f_flags);
else if (type == BPF_TYPE_LINK)
ret = bpf_link_new_fd(raw);
else
return -ENOENT;
@ -504,6 +536,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
if (inode->i_op == &bpf_map_iops)
return ERR_PTR(-EINVAL);
if (inode->i_op == &bpf_link_iops)
return ERR_PTR(-EINVAL);
if (inode->i_op != &bpf_prog_iops)
return ERR_PTR(-EACCES);

View File

@ -2173,84 +2173,274 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
{
struct bpf_prog *prog = filp->private_data;
struct bpf_link {
atomic64_t refcnt;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
};
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
bpf_prog_put(prog);
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog)
{
atomic64_set(&link->refcnt, 1);
link->ops = ops;
link->prog = prog;
}
/* Clean up bpf_link and corresponding anon_inode file and FD. After
* anon_inode is created, bpf_link can't be just kfree()'d due to deferred
* anon_inode's release() call. This helper manages marking bpf_link as
* defunct, releases anon_inode file and puts reserved FD.
*/
static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
int link_fd)
{
link->prog = NULL;
fput(link_file);
put_unused_fd(link_fd);
}
void bpf_link_inc(struct bpf_link *link)
{
atomic64_inc(&link->refcnt);
}
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
if (link->prog) {
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
/* free bpf_link and its containing memory */
link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
{
struct bpf_link *link = container_of(work, struct bpf_link, work);
bpf_link_free(link);
}
/* bpf_link_put can be called from atomic context, but ensures that resources
* are freed from process context
*/
void bpf_link_put(struct bpf_link *link)
{
if (!atomic64_dec_and_test(&link->refcnt))
return;
if (in_atomic()) {
INIT_WORK(&link->work, bpf_link_put_deferred);
schedule_work(&link->work);
} else {
bpf_link_free(link);
}
}
static int bpf_link_release(struct inode *inode, struct file *filp)
{
struct bpf_link *link = filp->private_data;
bpf_link_put(link);
return 0;
}
static const struct file_operations bpf_tracing_prog_fops = {
.release = bpf_tracing_prog_release,
#ifdef CONFIG_PROC_FS
static const struct bpf_link_ops bpf_raw_tp_lops;
static const struct bpf_link_ops bpf_tracing_link_lops;
static const struct bpf_link_ops bpf_xdp_link_lops;
static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_link *link = filp->private_data;
const struct bpf_prog *prog = link->prog;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
const char *link_type;
if (link->ops == &bpf_raw_tp_lops)
link_type = "raw_tracepoint";
else if (link->ops == &bpf_tracing_link_lops)
link_type = "tracing";
else
link_type = "unknown";
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
seq_printf(m,
"link_type:\t%s\n"
"prog_tag:\t%s\n"
"prog_id:\t%u\n",
link_type,
prog_tag,
prog->aux->id);
}
#endif
const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
#endif
.release = bpf_link_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
};
int bpf_link_new_fd(struct bpf_link *link)
{
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
}
/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
* instead of immediately installing fd in fdtable, just reserve it and
* return. Caller then need to either install it with fd_install(fd, file) or
* release with put_unused_fd(fd).
* This is useful for cases when bpf_link attachment/detachment are
* complicated and expensive operations and should be delayed until all the fd
* reservation and anon_inode creation succeeds.
*/
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
{
struct file *file;
int fd;
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
return ERR_PTR(fd);
file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
if (IS_ERR(file)) {
put_unused_fd(fd);
return file;
}
*reserved_fd = fd;
return file;
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_link *link;
if (!f.file)
return ERR_PTR(-EBADF);
if (f.file->f_op != &bpf_link_fops) {
fdput(f);
return ERR_PTR(-EINVAL);
}
link = f.file->private_data;
bpf_link_inc(link);
fdput(f);
return link;
}
struct bpf_tracing_link {
struct bpf_link link;
};
static void bpf_tracing_link_release(struct bpf_link *link)
{
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
}
static void bpf_tracing_link_dealloc(struct bpf_link *link)
{
struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link);
kfree(tr_link);
}
static const struct bpf_link_ops bpf_tracing_link_lops = {
.release = bpf_tracing_link_release,
.dealloc = bpf_tracing_link_dealloc,
};
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
{
int tr_fd, err;
struct bpf_tracing_link *link;
struct file *link_file;
int link_fd, err;
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
prog->expected_attach_type != BPF_MODIFY_RETURN &&
prog->type != BPF_PROG_TYPE_EXT) {
err = -EINVAL;
goto out_put_prog;
}
err = bpf_trampoline_link_prog(prog);
if (err)
goto out_put_prog;
tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
prog, O_CLOEXEC);
if (tr_fd < 0) {
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
err = tr_fd;
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
goto out_put_prog;
}
return tr_fd;
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
link_file = bpf_link_new_file(&link->link, &link_fd);
if (IS_ERR(link_file)) {
kfree(link);
err = PTR_ERR(link_file);
goto out_put_prog;
}
err = bpf_trampoline_link_prog(prog);
if (err) {
bpf_link_cleanup(&link->link, link_file, link_fd);
goto out_put_prog;
}
fd_install(link_fd, link_file);
return link_fd;
out_put_prog:
bpf_prog_put(prog);
return err;
}
struct bpf_raw_tracepoint {
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
};
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
static void bpf_raw_tp_link_release(struct bpf_link *link)
{
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
if (raw_tp->prog) {
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
bpf_prog_put(raw_tp->prog);
}
bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
bpf_put_raw_tracepoint(raw_tp->btp);
kfree(raw_tp);
return 0;
}
static const struct file_operations bpf_raw_tp_fops = {
.release = bpf_raw_tracepoint_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
{
struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
kfree(raw_tp);
}
static const struct bpf_link_ops bpf_raw_tp_lops = {
.release = bpf_raw_tp_link_release,
.dealloc = bpf_raw_tp_link_dealloc,
};
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{
struct bpf_raw_tracepoint *raw_tp;
struct bpf_raw_tp_link *link;
struct bpf_raw_event_map *btp;
struct file *link_file;
struct bpf_prog *prog;
const char *tp_name;
char buf[128];
int tp_fd, err;
int link_fd, err;
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
return -EINVAL;
@ -2297,29 +2487,30 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
goto out_put_prog;
}
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
if (!raw_tp) {
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
goto out_put_btp;
}
raw_tp->btp = btp;
raw_tp->prog = prog;
bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
link->btp = btp;
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
goto out_free_tp;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
O_CLOEXEC);
if (tp_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd;
goto out_free_tp;
link_file = bpf_link_new_file(&link->link, &link_fd);
if (IS_ERR(link_file)) {
kfree(link);
err = PTR_ERR(link_file);
goto out_put_btp;
}
return tp_fd;
out_free_tp:
kfree(raw_tp);
err = bpf_probe_register(link->btp, prog);
if (err) {
bpf_link_cleanup(&link->link, link_file, link_fd);
goto out_put_btp;
}
fd_install(link_fd, link_file);
return link_fd;
out_put_btp:
bpf_put_raw_tracepoint(btp);
out_put_prog:
@ -3266,15 +3457,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (err)
goto out;
if (file->f_op == &bpf_raw_tp_fops) {
struct bpf_raw_tracepoint *raw_tp = file->private_data;
struct bpf_raw_event_map *btp = raw_tp->btp;
if (file->f_op == &bpf_link_fops) {
struct bpf_link *link = file->private_data;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
if (link->ops == &bpf_raw_tp_lops) {
struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
struct bpf_raw_event_map *btp = raw_tp->btp;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->link.prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
}
goto out_not_supp;
}
event = perf_get_event(file);
@ -3294,6 +3491,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
goto put_file;
}
out_not_supp:
err = -ENOTSUPP;
put_file:
fput(file);

View File

@ -5,6 +5,7 @@
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/rbtree_latch.h>
#include <linux/perf_event.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@ -17,12 +18,11 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
static struct latch_tree_root image_tree __cacheline_aligned;
/* serializes access to trampoline_table and image_tree */
/* serializes access to trampoline_table */
static DEFINE_MUTEX(trampoline_mutex);
static void *bpf_jit_alloc_exec_page(void)
void *bpf_jit_alloc_exec_page(void)
{
void *image;
@ -38,62 +38,28 @@ static void *bpf_jit_alloc_exec_page(void)
return image;
}
static __always_inline bool image_tree_less(struct latch_tree_node *a,
struct latch_tree_node *b)
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
{
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
return ia < ib;
ksym->start = (unsigned long) data;
ksym->end = ksym->start + PAGE_SIZE;
bpf_ksym_add(ksym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
PAGE_SIZE, false, ksym->name);
}
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
void bpf_image_ksym_del(struct bpf_ksym *ksym)
{
void *image = container_of(n, struct bpf_image, tnode);
if (addr < image)
return -1;
if (addr >= image + PAGE_SIZE)
return 1;
return 0;
bpf_ksym_del(ksym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
PAGE_SIZE, true, ksym->name);
}
static const struct latch_tree_ops image_tree_ops = {
.less = image_tree_less,
.comp = image_tree_comp,
};
static void *__bpf_image_alloc(bool lock)
static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
{
struct bpf_image *image;
struct bpf_ksym *ksym = &tr->ksym;
image = bpf_jit_alloc_exec_page();
if (!image)
return NULL;
if (lock)
mutex_lock(&trampoline_mutex);
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
if (lock)
mutex_unlock(&trampoline_mutex);
return image->data;
}
void *bpf_image_alloc(void)
{
return __bpf_image_alloc(true);
}
bool is_bpf_image_address(unsigned long addr)
{
bool ret;
rcu_read_lock();
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
rcu_read_unlock();
return ret;
snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
bpf_image_ksym_add(tr->image, ksym);
}
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
@ -116,7 +82,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
image = __bpf_image_alloc(false);
image = bpf_jit_alloc_exec_page();
if (!image) {
kfree(tr);
tr = NULL;
@ -131,6 +97,8 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
tr->image = image;
INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
bpf_trampoline_ksym_add(tr);
out:
mutex_unlock(&trampoline_mutex);
return tr;
@ -190,40 +158,50 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
#define BPF_MAX_TRAMP_PROGS 40
static struct bpf_tramp_progs *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
{
const struct bpf_prog_aux *aux;
struct bpf_tramp_progs *tprogs;
struct bpf_prog **progs;
int kind;
*total = 0;
tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
if (!tprogs)
return ERR_PTR(-ENOMEM);
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
tprogs[kind].nr_progs = tr->progs_cnt[kind];
*total += tr->progs_cnt[kind];
progs = tprogs[kind].progs;
hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
*progs++ = aux->prog;
}
return tprogs;
}
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
struct bpf_prog **progs, **fentry, **fexit;
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
struct bpf_tramp_progs *tprogs;
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
struct bpf_prog_aux *aux;
int err;
int err, total;
if (fentry_cnt + fexit_cnt == 0) {
tprogs = bpf_trampoline_get_progs(tr, &total);
if (IS_ERR(tprogs))
return PTR_ERR(tprogs);
if (total == 0) {
err = unregister_fentry(tr, old_image);
tr->selector = 0;
goto out;
}
/* populate fentry progs */
fentry = progs = progs_to_run;
hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
*progs++ = aux->prog;
/* populate fexit progs */
fexit = progs;
hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
*progs++ = aux->prog;
if (fexit_cnt)
if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
/* Though the second half of trampoline page is unused a task could be
@ -232,12 +210,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
* preempted task. Hence wait for tasks to voluntarily schedule or go
* to userspace.
*/
synchronize_rcu_tasks();
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
&tr->func.model, flags,
fentry, fentry_cnt,
fexit, fexit_cnt,
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
&tr->func.model, flags, tprogs,
tr->func.addr);
if (err < 0)
goto out;
@ -252,6 +229,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out;
tr->selector++;
out:
kfree(tprogs);
return err;
}
@ -260,6 +238,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
switch (t) {
case BPF_TRACE_FENTRY:
return BPF_TRAMP_FENTRY;
case BPF_MODIFY_RETURN:
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
default:
@ -344,8 +324,6 @@ out:
void bpf_trampoline_put(struct bpf_trampoline *tr)
{
struct bpf_image *image;
if (!tr)
return;
mutex_lock(&trampoline_mutex);
@ -356,11 +334,10 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
goto out;
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out;
image = container_of(tr->image, struct bpf_image, data);
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
bpf_image_ksym_del(&tr->ksym);
/* wait for tasks to get out of trampoline before freeing it */
synchronize_rcu_tasks();
bpf_jit_free_exec(image);
bpf_jit_free_exec(tr->image);
hlist_del(&tr->hlist);
kfree(tr);
out:
@ -375,6 +352,7 @@ out:
* call __bpf_prog_exit
*/
u64 notrace __bpf_prog_enter(void)
__acquires(RCU)
{
u64 start = 0;
@ -386,6 +364,7 @@ u64 notrace __bpf_prog_enter(void)
}
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
__releases(RCU)
{
struct bpf_prog_stats *stats;
@ -409,8 +388,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
int __weak
arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_prog **fentry_progs, int fentry_cnt,
struct bpf_prog **fexit_progs, int fexit_cnt,
struct bpf_tramp_progs *tprogs,
void *orig_call)
{
return -ENOTSUPP;

View File

@ -19,6 +19,7 @@
#include <linux/sort.h>
#include <linux/perf_event.h>
#include <linux/ctype.h>
#include <linux/error-injection.h>
#include "disasm.h"
@ -3649,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_perf_event_read &&
func_id != BPF_FUNC_perf_event_output &&
func_id != BPF_FUNC_skb_output &&
func_id != BPF_FUNC_perf_event_read_value)
func_id != BPF_FUNC_perf_event_read_value &&
func_id != BPF_FUNC_xdp_output)
goto error;
break;
case BPF_MAP_TYPE_STACK_TRACE:
@ -3739,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
case BPF_FUNC_skb_output:
case BPF_FUNC_xdp_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
@ -9800,6 +9803,26 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return 0;
}
#define SECURITY_PREFIX "security_"
static int check_attach_modify_return(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
/* This is expected to be cleaned up in the future with the KRSI effort
* introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
*/
if (within_error_injection_list(addr) ||
!strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
sizeof(SECURITY_PREFIX) - 1))
return 0;
verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n",
prog->aux->attach_btf_id, prog->aux->attach_func_name);
return -EINVAL;
}
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
@ -9950,6 +9973,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (!prog_extension)
return -EINVAL;
/* fallthrough */
case BPF_MODIFY_RETURN:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) {
@ -9999,6 +10023,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
tr->func.addr = (void *)addr;
prog->aux->trampoline = tr;
if (prog->expected_attach_type == BPF_MODIFY_RETURN)
ret = check_attach_modify_return(env);
out:
mutex_unlock(&tr->mutex);
if (ret)

View File

@ -8255,23 +8255,22 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
enum perf_bpf_event_type type)
{
bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
char sym[KSYM_NAME_LEN];
int i;
if (prog->aux->func_cnt == 0) {
bpf_get_prog_name(prog, sym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)prog->bpf_func,
prog->jited_len, unregister, sym);
prog->jited_len, unregister,
prog->aux->ksym.name);
} else {
for (i = 0; i < prog->aux->func_cnt; i++) {
struct bpf_prog *subprog = prog->aux->func[i];
bpf_get_prog_name(subprog, sym);
perf_event_ksymbol(
PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)subprog->bpf_func,
subprog->jited_len, unregister, sym);
subprog->jited_len, unregister,
prog->aux->ksym.name);
}
}
}

View File

@ -149,8 +149,6 @@ int kernel_text_address(unsigned long addr)
goto out;
if (is_bpf_text_address(addr))
goto out;
if (is_bpf_image_address(addr))
goto out;
ret = 0;
out:
if (no_rcu)

View File

@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
return &bpf_get_ns_current_pid_tgid_proto;
default:
return NULL;
}
@ -1143,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
};
extern const struct bpf_func_proto bpf_skb_output_proto;
extern const struct bpf_func_proto bpf_xdp_output_proto;
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
@ -1218,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_NET
case BPF_FUNC_skb_output:
return &bpf_skb_output_proto;
case BPF_FUNC_xdp_output:
return &bpf_xdp_output_proto;
#endif
default:
return raw_tp_prog_func_proto(func_id, prog);
@ -1252,6 +1257,13 @@ static bool tracing_prog_is_valid_access(int off, int size,
return btf_ctx_access(off, size, type, prog, info);
}
int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
return -ENOTSUPP;
}
const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
.get_func_proto = raw_tp_prog_func_proto,
.is_valid_access = raw_tp_prog_is_valid_access,
@ -1266,6 +1278,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = {
};
const struct bpf_prog_ops tracing_prog_ops = {
.test_run = bpf_prog_test_run_tracing,
};
static bool raw_tp_writable_prog_is_valid_access(int off, int size,

View File

@ -10,6 +10,7 @@
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <linux/error-injection.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@ -143,6 +144,14 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
return a + (long)b + c + d + (long)e + f;
}
int noinline bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
@ -160,18 +169,48 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
kfree(data);
return ERR_PTR(-EFAULT);
}
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) {
kfree(data);
return ERR_PTR(-EFAULT);
}
return data;
}
int bpf_prog_test_run_tracing(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
u16 side_effect = 0, ret = 0;
int b = 2, err = -EFAULT;
u32 retval = 0;
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111)
goto out;
break;
case BPF_MODIFY_RETURN:
ret = bpf_modify_return_test(1, &b);
if (b != 2)
side_effect = 1;
break;
default:
goto out;
}
retval = ((u32)side_effect << 16) | ret;
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
goto out;
err = 0;
out:
trace_bpf_test_finish(&err);
return err;
}
static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
{
void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
@ -277,6 +316,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* gso_segs is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
offsetof(struct __sk_buff, gso_size)))
return -EINVAL;
/* gso_size is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
sizeof(struct __sk_buff)))
return -EINVAL;
@ -297,6 +342,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (__skb->gso_segs > GSO_MAX_SEGS)
return -EINVAL;
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
skb_shinfo(skb)->gso_size = __skb->gso_size;
return 0;
}

View File

@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
if (unlikely(!xdp ||
xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, xdp->data,
@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
static int bpf_xdp_output_btf_ids[5];
const struct bpf_func_proto bpf_xdp_output_proto = {
.func = bpf_xdp_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
.btf_id = bpf_xdp_output_btf_ids,
};
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
{
return skb->sk ? sock_gen_cookie(skb->sk) : 0;
@ -7139,6 +7153,27 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
/* si->dst_reg = skb_shinfo(SKB); */
#ifdef NET_SKBUFF_DATA_USES_OFFSET
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, end));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, head));
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
#else
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, end));
#endif
return insn;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@ -7461,26 +7496,21 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, gso_segs):
/* si->dst_reg = skb_shinfo(SKB); */
#ifdef NET_SKBUFF_DATA_USES_OFFSET
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, end));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, head));
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
#else
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, end));
#endif
insn = bpf_convert_shinfo_access(si, insn);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
si->dst_reg, si->dst_reg,
bpf_target_off(struct skb_shared_info,
gso_segs, 2,
target_size));
break;
case offsetof(struct __sk_buff, gso_size):
insn = bpf_convert_shinfo_access(si, insn);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
si->dst_reg, si->dst_reg,
bpf_target_off(struct skb_shared_info,
gso_size, 2,
target_size));
break;
case offsetof(struct __sk_buff, wire_len):
BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
@ -8829,10 +8859,9 @@ const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */
DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
DEFINE_BPF_DISPATCHER(xdp)
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
prev_prog, prog);
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}

View File

@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/sock_diag.h>
#include <net/udp.h>
struct bpf_stab {
struct bpf_map map;
@ -141,12 +142,58 @@ static void sock_map_unref(struct sock *sk, void *link_raw)
}
}
static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;
sock_owned_by_me(sk);
switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
break;
case SOCK_DGRAM:
prot = udp_bpf_get_proto(sk, psock);
break;
default:
return -EINVAL;
}
if (IS_ERR(prot))
return PTR_ERR(prot);
sk_psock_update_proto(sk, psock, prot);
return 0;
}
static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
{
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
if (psock) {
if (sk->sk_prot->close != sock_map_close) {
psock = ERR_PTR(-EBUSY);
goto out;
}
if (!refcount_inc_not_zero(&psock->refcnt))
psock = ERR_PTR(-EBUSY);
}
out:
rcu_read_unlock();
return psock;
}
static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
struct sock *sk)
{
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
bool skb_progs, sk_psock_is_new = false;
struct sk_psock *psock;
bool skb_progs;
int ret;
skb_verdict = READ_ONCE(progs->skb_verdict);
@ -172,7 +219,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
}
psock = sk_psock_get_checked(sk);
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
goto out_progs;
@ -191,18 +238,14 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
ret = -ENOMEM;
goto out_progs;
}
sk_psock_is_new = true;
}
if (msg_parser)
psock_set_prog(&psock->progs.msg_parser, msg_parser);
if (sk_psock_is_new) {
ret = tcp_bpf_init(sk);
if (ret < 0)
goto out_drop;
} else {
tcp_bpf_reinit(sk);
}
ret = sock_map_init_proto(sk, psock);
if (ret < 0)
goto out_drop;
write_lock_bh(&sk->sk_callback_lock);
if (skb_progs && !psock->parser.enabled) {
@ -235,20 +278,17 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
struct sk_psock *psock;
int ret;
psock = sk_psock_get_checked(sk);
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock))
return PTR_ERR(psock);
if (psock) {
tcp_bpf_reinit(sk);
return 0;
if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
if (!psock)
return -ENOMEM;
}
psock = sk_psock_init(sk, map->numa_node);
if (!psock)
return -ENOMEM;
ret = tcp_bpf_init(sk);
ret = sock_map_init_proto(sk, psock);
if (ret < 0)
sk_psock_put(sk, psock);
return ret;
@ -384,7 +424,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_psock_link *link;
struct sk_psock *psock;
struct sock *osk;
@ -395,7 +434,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
if (inet_csk_has_ulp(sk))
return -EINVAL;
link = sk_psock_init_link();
@ -448,15 +487,31 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
static bool sock_map_sk_is_suitable(const struct sock *sk)
static bool sk_is_tcp(const struct sock *sk)
{
return sk->sk_type == SOCK_STREAM &&
sk->sk_protocol == IPPROTO_TCP;
}
static bool sk_is_udp(const struct sock *sk)
{
return sk->sk_type == SOCK_DGRAM &&
sk->sk_protocol == IPPROTO_UDP;
}
static bool sock_map_sk_is_suitable(const struct sock *sk)
{
return sk_is_tcp(sk) || sk_is_udp(sk);
}
static bool sock_map_sk_state_allowed(const struct sock *sk)
{
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
if (sk_is_tcp(sk))
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
else if (sk_is_udp(sk))
return sk_hashed(sk);
return false;
}
static int sock_map_update_elem(struct bpf_map *map, void *key,
@ -738,7 +793,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct inet_connection_sock *icsk = inet_csk(sk);
u32 key_size = map->key_size, hash;
struct bpf_htab_elem *elem, *elem_new;
struct bpf_htab_bucket *bucket;
@ -749,7 +803,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
if (unlikely(icsk->icsk_ulp_data))
if (inet_csk_has_ulp(sk))
return -EINVAL;
link = sk_psock_init_link();
@ -1129,7 +1183,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
return 0;
}
void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
case BPF_MAP_TYPE_SOCKMAP:
@ -1142,3 +1196,54 @@ void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
break;
}
}
static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_link *link;
while ((link = sk_psock_link_pop(psock))) {
sock_map_unlink(sk, link);
sk_psock_free_link(link);
}
}
void sock_map_unhash(struct sock *sk)
{
void (*saved_unhash)(struct sock *sk);
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
if (sk->sk_prot->unhash)
sk->sk_prot->unhash(sk);
return;
}
saved_unhash = psock->saved_unhash;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
saved_unhash(sk);
}
void sock_map_close(struct sock *sk, long timeout)
{
void (*saved_close)(struct sock *sk, long timeout);
struct sk_psock *psock;
lock_sock(sk);
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
return sk->sk_prot->close(sk, timeout);
}
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
release_sock(sk);
saved_close(sk, timeout);
}

View File

@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \

View File

@ -528,57 +528,7 @@ out_err:
return copied ? copied : err;
}
static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_link *link;
while ((link = sk_psock_link_pop(psock))) {
sk_psock_unlink(sk, link);
sk_psock_free_link(link);
}
}
static void tcp_bpf_unhash(struct sock *sk)
{
void (*saved_unhash)(struct sock *sk);
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
if (sk->sk_prot->unhash)
sk->sk_prot->unhash(sk);
return;
}
saved_unhash = psock->saved_unhash;
tcp_bpf_remove(sk, psock);
rcu_read_unlock();
saved_unhash(sk);
}
static void tcp_bpf_close(struct sock *sk, long timeout)
{
void (*saved_close)(struct sock *sk, long timeout);
struct sk_psock *psock;
lock_sock(sk);
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
return sk->sk_prot->close(sk, timeout);
}
saved_close = psock->saved_close;
tcp_bpf_remove(sk, psock);
rcu_read_unlock();
release_sock(sk);
saved_close(sk, timeout);
}
#ifdef CONFIG_BPF_STREAM_PARSER
enum {
TCP_BPF_IPV4,
TCP_BPF_IPV6,
@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash;
prot[TCP_BPF_BASE].close = tcp_bpf_close;
prot[TCP_BPF_BASE].unhash = sock_map_unhash;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
@ -629,28 +579,6 @@ static int __init tcp_bpf_v4_build_proto(void)
}
core_initcall(tcp_bpf_v4_build_proto);
static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
{
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
}
static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
{
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
* or added requiring sk_prot hook updates. We keep original saved
* hooks in this case.
*
* Pairs with lockless read in sk_clone_lock().
*/
WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
}
static int tcp_bpf_assert_proto_ops(struct proto *ops)
{
/* In order to avoid retpoline, we make assumptions when we call
@ -662,36 +590,21 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
}
void tcp_bpf_reinit(struct sock *sk)
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock *psock;
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
sock_owned_by_me(sk);
if (!psock->sk_proto) {
struct proto *ops = READ_ONCE(sk->sk_prot);
rcu_read_lock();
psock = sk_psock(sk);
tcp_bpf_reinit_sk_prot(sk, psock);
rcu_read_unlock();
}
if (tcp_bpf_assert_proto_ops(ops))
return ERR_PTR(-EINVAL);
int tcp_bpf_init(struct sock *sk)
{
struct proto *ops = READ_ONCE(sk->sk_prot);
struct sk_psock *psock;
sock_owned_by_me(sk);
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock || psock->sk_proto ||
tcp_bpf_assert_proto_ops(ops))) {
rcu_read_unlock();
return -EINVAL;
tcp_bpf_check_v6_needs_rebuild(sk, ops);
}
tcp_bpf_check_v6_needs_rebuild(sk, ops);
tcp_bpf_update_sk_prot(sk, psock);
rcu_read_unlock();
return 0;
return &tcp_bpf_prots[family][config];
}
/* If a child got cloned from a listening socket that had tcp_bpf
@ -707,3 +620,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_STREAM_PARSER */

View File

@ -105,13 +105,6 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto,
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (!icsk->icsk_ulp_ops) {
sk->sk_write_space = write_space;
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, proto);
return;
}
if (icsk->icsk_ulp_ops->update)
icsk->icsk_ulp_ops->update(sk, proto, write_space);
}

53
net/ipv4/udp_bpf.c Normal file
View File

@ -0,0 +1,53 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */
#include <linux/skmsg.h>
#include <net/sock.h>
#include <net/udp.h>
enum {
UDP_BPF_IPV4,
UDP_BPF_IPV6,
UDP_BPF_NUM_PROTS,
};
static struct proto *udpv6_prot_saved __read_mostly;
static DEFINE_SPINLOCK(udpv6_prot_lock);
static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
prot->unhash = sock_map_unhash;
prot->close = sock_map_close;
}
static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
{
if (sk->sk_family == AF_INET6 &&
unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
spin_lock_bh(&udpv6_prot_lock);
if (likely(ops != udpv6_prot_saved)) {
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops);
smp_store_release(&udpv6_prot_saved, ops);
}
spin_unlock_bh(&udpv6_prot_lock);
}
}
static int __init udp_bpf_v4_build_proto(void)
{
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot);
return 0;
}
core_initcall(udp_bpf_v4_build_proto);
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
if (!psock->sk_proto)
udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot));
return &udp_bpf_prots[family];
}

View File

@ -400,6 +400,7 @@ class PrinterHelpers(Printer):
'struct bpf_fib_lookup',
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
'struct bpf_sock',
'struct bpf_sock_addr',
'struct bpf_sock_ops',
@ -435,6 +436,7 @@ class PrinterHelpers(Printer):
'struct bpf_fib_lookup',
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
'struct bpf_sock',
'struct bpf_sock_addr',
'struct bpf_sock_ops',

View File

@ -63,12 +63,18 @@ vmlinux_link()
local lds="${objtree}/${KBUILD_LDS}"
local output=${1}
local objects
local strip_debug
info LD ${output}
# skip output file argument
shift
# The kallsyms linking does not need debug symbols included.
if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then
strip_debug=-Wl,--strip-debug
fi
if [ "${SRCARCH}" != "um" ]; then
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
@ -79,6 +85,7 @@ vmlinux_link()
${@}"
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
${strip_debug#-Wl,} \
-o ${output} \
-T ${lds} ${objects}
else
@ -91,6 +98,7 @@ vmlinux_link()
${@}"
${CC} ${CFLAGS_vmlinux} \
${strip_debug} \
-o ${output} \
-Wl,-T,${lds} \
${objects} \
@ -106,6 +114,8 @@ gen_btf()
{
local pahole_ver
local bin_arch
local bin_format
local bin_file
if ! [ -x "$(command -v ${PAHOLE})" ]; then
echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
@ -118,8 +128,9 @@ gen_btf()
return 1
fi
info "BTF" ${2}
vmlinux_link ${1}
info "BTF" ${2}
LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
# dump .BTF section into raw binary file to link with final vmlinux
@ -127,11 +138,12 @@ gen_btf()
cut -d, -f1 | cut -d' ' -f2)
bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \
awk '{print $4}')
bin_file=.btf.vmlinux.bin
${OBJCOPY} --change-section-address .BTF=0 \
--set-section-flags .BTF=alloc -O binary \
--only-section=.BTF ${1} .btf.vmlinux.bin
--only-section=.BTF ${1} $bin_file
${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \
--rename-section .data=.BTF .btf.vmlinux.bin ${2}
--rename-section .data=.BTF $bin_file ${2}
}
# Create ${2} .o file with all symbols from the ${1} object file
@ -166,8 +178,8 @@ kallsyms()
kallsyms_step()
{
kallsymso_prev=${kallsymso}
kallsymso=.tmp_kallsyms${1}.o
kallsyms_vmlinux=.tmp_vmlinux${1}
kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1}
kallsymso=${kallsyms_vmlinux}.o
vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o}
kallsyms ${kallsyms_vmlinux} ${kallsymso}
@ -190,7 +202,6 @@ cleanup()
{
rm -f .btf.*
rm -f .tmp_System.map
rm -f .tmp_kallsyms*
rm -f .tmp_vmlinux*
rm -f System.map
rm -f vmlinux
@ -257,9 +268,8 @@ tr '\0' '\n' < modules.builtin.modinfo | sed -n 's/^[[:alnum:]:_]*\.file=//p' |
btf_vmlinux_bin_o=""
if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then
btf_vmlinux_bin_o=.btf.vmlinux.bin.o
else
btf_vmlinux_bin_o=.btf.vmlinux.bin.o
if ! gen_btf .tmp_vmlinux.btf $btf_vmlinux_bin_o ; then
echo >&2 "Failed to generate BTF for vmlinux"
echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF"
exit 1

View File

@ -1,7 +1,9 @@
*.d
/_bpftool
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
profiler.skel.h

View File

@ -30,6 +30,7 @@ PROG COMMANDS
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
| **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs*
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@ -48,6 +49,9 @@ PROG COMMANDS
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
| *METRIC* := {
| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
| }
DESCRIPTION
@ -189,6 +193,12 @@ DESCRIPTION
not all of them can take the **ctx_in**/**ctx_out**
arguments. bpftool does not perform checks on program types.
**bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
Profile *METRICs* for bpf program *PROG* for *DURATION*
seconds or until user hits Ctrl-C. *DURATION* is optional.
If *DURATION* is not specified, the profiling will run up to
UINT_MAX seconds.
**bpftool prog help**
Print short help message.
@ -311,6 +321,15 @@ EXAMPLES
**# rm /sys/fs/bpf/xdp1**
|
| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
::
51397 run_cnt
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
SEE ALSO
========
**bpf**\ (2),

View File

@ -59,10 +59,12 @@ LIBS = $(LIBBPF) -lelf -lz
INSTALL ?= install
RM ?= rm -f
CLANG ?= clang
FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib
FEATURE_DISPLAY = libbfd disassembler-four-args zlib
FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \
clang-bpf-global-var
FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@ -110,14 +112,39 @@ SRCS += $(BFD_SRCS)
endif
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
ifeq ($(feature-clang-bpf-global-var),1)
__OBJS = $(OBJS)
else
__OBJS = $(_OBJS)
endif
$(OUTPUT)_prog.o: prog.c
$(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
$(QUIET_CLANG)$(CLANG) \
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
-g -O2 -target bpf -c $< -o $@
profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
$(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
$(OUTPUT)prog.o: prog.c profiler.skel.h
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
$(OUTPUT)feature.o: | zdep
$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
@ -125,6 +152,7 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
$(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
$(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool

View File

@ -337,6 +337,7 @@ _bpftool()
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@ -388,7 +389,7 @@ _bpftool()
_bpftool_get_prog_ids
;;
name)
_bpftool_get_map_names
_bpftool_get_prog_names
;;
pinned)
_filedir
@ -498,9 +499,51 @@ _bpftool()
tracelog)
return 0
;;
profile)
case $cword in
3)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
return 0
;;
4)
case $prev in
id)
_bpftool_get_prog_ids
;;
name)
_bpftool_get_prog_names
;;
pinned)
_filedir
;;
esac
return 0
;;
5)
COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) )
return 0
;;
6)
case $prev in
duration)
return 0
;;
*)
COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
return 0
;;
esac
return 0
;;
*)
COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
return 0
;;
esac
;;
run)
if [[ ${#words[@]} -lt 5 ]]; then
_filedir
if [[ ${#words[@]} -eq 4 ]]; then
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
return 0
fi
case $prev in
@ -508,6 +551,10 @@ _bpftool()
_bpftool_get_prog_ids
return 0
;;
name)
_bpftool_get_prog_names
return 0
;;
data_in|data_out|ctx_in|ctx_out)
_filedir
return 0
@ -525,7 +572,7 @@ _bpftool()
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'dump help pin attach detach \
load loadall show list tracelog run' -- "$cur" ) )
load loadall show list tracelog run profile' -- "$cur" ) )
;;
esac
;;
@ -713,11 +760,17 @@ _bpftool()
esac
;;
pin)
if [[ $prev == "$command" ]]; then
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
else
_filedir
fi
case $prev in
$command)
COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
;;
id)
_bpftool_get_map_ids
;;
name)
_bpftool_get_map_names
;;
esac
return 0
;;
event_pipe)
@ -844,7 +897,7 @@ _bpftool()
case $command in
skeleton)
_filedir
;;
;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
@ -944,6 +997,9 @@ _bpftool()
id)
_bpftool_get_prog_ids
;;
name)
_bpftool_get_prog_names
;;
pinned)
_filedir
;;

View File

@ -389,6 +389,9 @@ static int dump_btf_c(const struct btf *btf,
if (IS_ERR(d))
return PTR_ERR(d);
printf("#ifndef __VMLINUX_H__\n");
printf("#define __VMLINUX_H__\n");
printf("\n");
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n");
printf("#endif\n\n");
@ -412,6 +415,8 @@ static int dump_btf_c(const struct btf *btf,
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute pop\n");
printf("#endif\n");
printf("\n");
printf("#endif /* __VMLINUX_H__ */\n");
done:
btf_dump__free(d);

View File

@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name)
return err;
}
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
{
unsigned int id;
char *endptr;
int err;
int fd;
if (argc < 3) {
p_err("too few arguments, id ID and FILE path is required");
return -1;
} else if (argc > 3) {
p_err("too many arguments");
return -1;
}
if (!is_prefix(*argv, "id")) {
p_err("expected 'id' got %s", *argv);
return -1;
}
NEXT_ARG();
id = strtoul(*argv, &endptr, 0);
if (*endptr) {
p_err("can't parse %s as ID", *argv);
return -1;
}
NEXT_ARG();
fd = get_fd_by_id(id);
if (fd < 0) {
p_err("can't open object by id (%u): %s", id, strerror(errno));
return -1;
}
fd = get_fd(&argc, &argv);
if (fd < 0)
return fd;
err = do_pin_fd(fd, *argv);
@ -597,3 +572,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
return 0;
}
int __printf(2, 0)
print_all_levels(__maybe_unused enum libbpf_print_level level,
const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}

View File

@ -79,13 +79,6 @@ static int do_version(int argc, char **argv)
return 0;
}
static int __printf(2, 0)
print_all_levels(__maybe_unused enum libbpf_print_level level,
const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}
int cmd_select(const struct cmd *cmds, int argc, char **argv,
int (*help)(int argc, char **argv))
{

View File

@ -14,6 +14,8 @@
#include <linux/hashtable.h>
#include <tools/libc_compat.h>
#include <bpf/libbpf.h>
#include "json_writer.h"
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
@ -146,7 +148,7 @@ char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(char *path, bool quiet);
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
int do_prog(int argc, char **arg);
@ -229,4 +231,7 @@ struct tcmsg;
int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
const char *devname, int ifindex);
int print_all_levels(__maybe_unused enum libbpf_print_level level,
const char *format, va_list args);
#endif

View File

@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv)
{
int err;
err = do_pin_any(argc, argv, bpf_map_get_fd_by_id);
err = do_pin_any(argc, argv, map_parse_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;

View File

@ -4,6 +4,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@ -11,10 +12,13 @@
#include <time.h>
#include <unistd.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <linux/err.h>
#include <linux/perf_event.h>
#include <linux/sizes.h>
#include <bpf/bpf.h>
@ -809,7 +813,7 @@ static int do_pin(int argc, char **argv)
{
int err;
err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
err = do_pin_any(argc, argv, prog_parse_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;
@ -1243,6 +1247,25 @@ free_data_in:
return err;
}
static int
get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
enum bpf_attach_type *expected_attach_type)
{
libbpf_print_fn_t print_backup;
int ret;
ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
if (!ret)
return ret;
/* libbpf_prog_type_by_name() failed, let's re-run with debug level */
print_backup = libbpf_set_print(print_all_levels);
ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
libbpf_set_print(print_backup);
return ret;
}
static int load_with_options(int argc, char **argv, bool first_prog_only)
{
enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC;
@ -1292,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
strcat(type, *argv);
strcat(type, "/");
err = libbpf_prog_type_by_name(type, &common_prog_type,
&expected_attach_type);
err = get_prog_type_by_name(type, &common_prog_type,
&expected_attach_type);
free(type);
if (err < 0)
goto err_free_reuse_maps;
@ -1392,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
const char *sec_name = bpf_program__title(pos, false);
err = libbpf_prog_type_by_name(sec_name, &prog_type,
&expected_attach_type);
err = get_prog_type_by_name(sec_name, &prog_type,
&expected_attach_type);
if (err < 0)
goto err_close_obj;
}
@ -1537,6 +1560,422 @@ static int do_loadall(int argc, char **argv)
return load_with_options(argc, argv, false);
}
#ifdef BPFTOOL_WITHOUT_SKELETONS
static int do_profile(int argc, char **argv)
{
p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0");
return 0;
}
#else /* BPFTOOL_WITHOUT_SKELETONS */
#include "profiler.skel.h"
struct profile_metric {
const char *name;
struct bpf_perf_event_value val;
struct perf_event_attr attr;
bool selected;
/* calculate ratios like instructions per cycle */
const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */
const char *ratio_desc;
const float ratio_mul;
} metrics[] = {
{
.name = "cycles",
.attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.exclude_user = 1,
},
},
{
.name = "instructions",
.attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_INSTRUCTIONS,
.exclude_user = 1,
},
.ratio_metric = 1,
.ratio_desc = "insns per cycle",
.ratio_mul = 1.0,
},
{
.name = "l1d_loads",
.attr = {
.type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1D |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
.exclude_user = 1,
},
},
{
.name = "llc_misses",
.attr = {
.type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_LL |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
.exclude_user = 1
},
.ratio_metric = 2,
.ratio_desc = "LLC misses per million insns",
.ratio_mul = 1e6,
},
};
static __u64 profile_total_count;
#define MAX_NUM_PROFILE_METRICS 4
static int profile_parse_metrics(int argc, char **argv)
{
unsigned int metric_cnt;
int selected_cnt = 0;
unsigned int i;
metric_cnt = sizeof(metrics) / sizeof(struct profile_metric);
while (argc > 0) {
for (i = 0; i < metric_cnt; i++) {
if (is_prefix(argv[0], metrics[i].name)) {
if (!metrics[i].selected)
selected_cnt++;
metrics[i].selected = true;
break;
}
}
if (i == metric_cnt) {
p_err("unknown metric %s", argv[0]);
return -1;
}
NEXT_ARG();
}
if (selected_cnt > MAX_NUM_PROFILE_METRICS) {
p_err("too many (%d) metrics, please specify no more than %d metrics at at time",
selected_cnt, MAX_NUM_PROFILE_METRICS);
return -1;
}
return selected_cnt;
}
static void profile_read_values(struct profiler_bpf *obj)
{
__u32 m, cpu, num_cpu = obj->rodata->num_cpu;
int reading_map_fd, count_map_fd;
__u64 counts[num_cpu];
__u32 key = 0;
int err;
reading_map_fd = bpf_map__fd(obj->maps.accum_readings);
count_map_fd = bpf_map__fd(obj->maps.counts);
if (reading_map_fd < 0 || count_map_fd < 0) {
p_err("failed to get fd for map");
return;
}
err = bpf_map_lookup_elem(count_map_fd, &key, counts);
if (err) {
p_err("failed to read count_map: %s", strerror(errno));
return;
}
profile_total_count = 0;
for (cpu = 0; cpu < num_cpu; cpu++)
profile_total_count += counts[cpu];
for (m = 0; m < ARRAY_SIZE(metrics); m++) {
struct bpf_perf_event_value values[num_cpu];
if (!metrics[m].selected)
continue;
err = bpf_map_lookup_elem(reading_map_fd, &key, values);
if (err) {
p_err("failed to read reading_map: %s",
strerror(errno));
return;
}
for (cpu = 0; cpu < num_cpu; cpu++) {
metrics[m].val.counter += values[cpu].counter;
metrics[m].val.enabled += values[cpu].enabled;
metrics[m].val.running += values[cpu].running;
}
key++;
}
}
static void profile_print_readings_json(void)
{
__u32 m;
jsonw_start_array(json_wtr);
for (m = 0; m < ARRAY_SIZE(metrics); m++) {
if (!metrics[m].selected)
continue;
jsonw_start_object(json_wtr);
jsonw_string_field(json_wtr, "metric", metrics[m].name);
jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count);
jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter);
jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled);
jsonw_lluint_field(json_wtr, "running", metrics[m].val.running);
jsonw_end_object(json_wtr);
}
jsonw_end_array(json_wtr);
}
static void profile_print_readings_plain(void)
{
__u32 m;
printf("\n%18llu %-20s\n", profile_total_count, "run_cnt");
for (m = 0; m < ARRAY_SIZE(metrics); m++) {
struct bpf_perf_event_value *val = &metrics[m].val;
int r;
if (!metrics[m].selected)
continue;
printf("%18llu %-20s", val->counter, metrics[m].name);
r = metrics[m].ratio_metric - 1;
if (r >= 0 && metrics[r].selected &&
metrics[r].val.counter > 0) {
printf("# %8.2f %-30s",
val->counter * metrics[m].ratio_mul /
metrics[r].val.counter,
metrics[m].ratio_desc);
} else {
printf("%-41s", "");
}
if (val->enabled > val->running)
printf("(%4.2f%%)",
val->running * 100.0 / val->enabled);
printf("\n");
}
}
static void profile_print_readings(void)
{
if (json_output)
profile_print_readings_json();
else
profile_print_readings_plain();
}
static char *profile_target_name(int tgt_fd)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
char *name = NULL;
struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
if (IS_ERR_OR_NULL(info_linear)) {
p_err("failed to get info_linear for prog FD %d", tgt_fd);
return NULL;
}
if (info_linear->info.btf_id == 0 ||
btf__get_from_id(info_linear->info.btf_id, &btf)) {
p_err("prog FD %d doesn't have valid btf", tgt_fd);
goto out;
}
func_info = (struct bpf_func_info *)(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
p_err("btf %d doesn't have type %d",
info_linear->info.btf_id, func_info[0].type_id);
goto out;
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
free(info_linear);
return name;
}
static struct profiler_bpf *profile_obj;
static int profile_tgt_fd = -1;
static char *profile_tgt_name;
static int *profile_perf_events;
static int profile_perf_event_cnt;
static void profile_close_perf_events(struct profiler_bpf *obj)
{
int i;
for (i = profile_perf_event_cnt - 1; i >= 0; i--)
close(profile_perf_events[i]);
free(profile_perf_events);
profile_perf_event_cnt = 0;
}
static int profile_open_perf_events(struct profiler_bpf *obj)
{
unsigned int cpu, m;
int map_fd, pmu_fd;
profile_perf_events = calloc(
sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
if (!profile_perf_events) {
p_err("failed to allocate memory for perf_event array: %s",
strerror(errno));
return -1;
}
map_fd = bpf_map__fd(obj->maps.events);
if (map_fd < 0) {
p_err("failed to get fd for events map");
return -1;
}
for (m = 0; m < ARRAY_SIZE(metrics); m++) {
if (!metrics[m].selected)
continue;
for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
-1/*pid*/, cpu, -1/*group_fd*/, 0);
if (pmu_fd < 0 ||
bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
&pmu_fd, BPF_ANY) ||
ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
p_err("failed to create event %s on cpu %d",
metrics[m].name, cpu);
return -1;
}
profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
}
}
return 0;
}
static void profile_print_and_cleanup(void)
{
profile_close_perf_events(profile_obj);
profile_read_values(profile_obj);
profile_print_readings();
profiler_bpf__destroy(profile_obj);
close(profile_tgt_fd);
free(profile_tgt_name);
}
static void int_exit(int signo)
{
profile_print_and_cleanup();
exit(0);
}
static int do_profile(int argc, char **argv)
{
int num_metric, num_cpu, err = -1;
struct bpf_program *prog;
unsigned long duration;
char *endptr;
/* we at least need two args for the prog and one metric */
if (!REQ_ARGS(3))
return -EINVAL;
/* parse target fd */
profile_tgt_fd = prog_parse_fd(&argc, &argv);
if (profile_tgt_fd < 0) {
p_err("failed to parse fd");
return -1;
}
/* parse profiling optional duration */
if (argc > 2 && is_prefix(argv[0], "duration")) {
NEXT_ARG();
duration = strtoul(*argv, &endptr, 0);
if (*endptr)
usage();
NEXT_ARG();
} else {
duration = UINT_MAX;
}
num_metric = profile_parse_metrics(argc, argv);
if (num_metric <= 0)
goto out;
num_cpu = libbpf_num_possible_cpus();
if (num_cpu <= 0) {
p_err("failed to identify number of CPUs");
goto out;
}
profile_obj = profiler_bpf__open();
if (!profile_obj) {
p_err("failed to open and/or load BPF object");
goto out;
}
profile_obj->rodata->num_cpu = num_cpu;
profile_obj->rodata->num_metric = num_metric;
/* adjust map sizes */
bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu);
bpf_map__resize(profile_obj->maps.fentry_readings, num_metric);
bpf_map__resize(profile_obj->maps.accum_readings, num_metric);
bpf_map__resize(profile_obj->maps.counts, 1);
/* change target name */
profile_tgt_name = profile_target_name(profile_tgt_fd);
if (!profile_tgt_name)
goto out;
bpf_object__for_each_program(prog, profile_obj->obj) {
err = bpf_program__set_attach_target(prog, profile_tgt_fd,
profile_tgt_name);
if (err) {
p_err("failed to set attach target\n");
goto out;
}
}
set_max_rlimit();
err = profiler_bpf__load(profile_obj);
if (err) {
p_err("failed to load profile_obj");
goto out;
}
err = profile_open_perf_events(profile_obj);
if (err)
goto out;
err = profiler_bpf__attach(profile_obj);
if (err) {
p_err("failed to attach profile_obj");
goto out;
}
signal(SIGINT, int_exit);
sleep(duration);
profile_print_and_cleanup();
return 0;
out:
profile_close_perf_events(profile_obj);
if (profile_obj)
profiler_bpf__destroy(profile_obj);
close(profile_tgt_fd);
free(profile_tgt_name);
return err;
}
#endif /* BPFTOOL_WITHOUT_SKELETONS */
static int do_help(int argc, char **argv)
{
if (json_output) {
@ -1560,6 +1999,7 @@ static int do_help(int argc, char **argv)
" [data_out FILE [data_size_out L]] \\\n"
" [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
" [repeat N]\n"
" %s %s profile PROG [duration DURATION] METRICs\n"
" %s %s tracelog\n"
" %s %s help\n"
"\n"
@ -1577,12 +2017,13 @@ static int do_help(int argc, char **argv)
" struct_ops | fentry | fexit | freplace }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2]);
bin_name, argv[-2], bin_name, argv[-2]);
return 0;
}
@ -1599,6 +2040,7 @@ static const struct cmd cmds[] = {
{ "detach", do_detach },
{ "tracelog", do_tracelog },
{ "run", do_run },
{ "profile", do_profile },
{ 0 }
};

View File

@ -0,0 +1,119 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
#include "profiler.h"
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/* map of perf event fds, num_cpu * num_metric entries */
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(int));
} events SEC(".maps");
/* readings at fentry */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct bpf_perf_event_value));
} fentry_readings SEC(".maps");
/* accumulated readings */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct bpf_perf_event_value));
} accum_readings SEC(".maps");
/* sample counts, one per cpu */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u64));
} counts SEC(".maps");
const volatile __u32 num_cpu = 1;
const volatile __u32 num_metric = 1;
#define MAX_NUM_MATRICS 4
SEC("fentry/XXX")
int BPF_PROG(fentry_XXX)
{
struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
u32 key = bpf_get_smp_processor_id();
u32 i;
/* look up before reading, to reduce error */
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
u32 flag = i;
ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);
if (!ptrs[i])
return 0;
}
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
struct bpf_perf_event_value reading;
int err;
err = bpf_perf_event_read_value(&events, key, &reading,
sizeof(reading));
if (err)
return 0;
*(ptrs[i]) = reading;
key += num_cpu;
}
return 0;
}
static inline void
fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
{
struct bpf_perf_event_value *before, diff, *accum;
before = bpf_map_lookup_elem(&fentry_readings, &id);
/* only account samples with a valid fentry_reading */
if (before && before->counter) {
struct bpf_perf_event_value *accum;
diff.counter = after->counter - before->counter;
diff.enabled = after->enabled - before->enabled;
diff.running = after->running - before->running;
accum = bpf_map_lookup_elem(&accum_readings, &id);
if (accum) {
accum->counter += diff.counter;
accum->enabled += diff.enabled;
accum->running += diff.running;
}
}
}
SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
u32 cpu = bpf_get_smp_processor_id();
u32 i, one = 1, zero = 0;
int err;
u64 *count;
/* read all events before updating the maps, to reduce error */
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
readings + i, sizeof(*readings));
if (err)
return 0;
}
count = bpf_map_lookup_elem(&counts, &zero);
if (count) {
*count += 1;
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)
fexit_update_maps(i, &readings[i]);
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -0,0 +1,46 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __PROFILER_H
#define __PROFILER_H
/* useful typedefs from vmlinux.h */
typedef signed char __s8;
typedef unsigned char __u8;
typedef short int __s16;
typedef short unsigned int __u16;
typedef int __s32;
typedef unsigned int __u32;
typedef long long int __s64;
typedef long long unsigned int __u64;
typedef __s8 s8;
typedef __u8 u8;
typedef __s16 s16;
typedef __u16 u16;
typedef __s32 s32;
typedef __u32 u32;
typedef __s64 s64;
typedef __u64 u64;
enum {
false = 0,
true = 1,
};
#ifdef __CHECKER__
#define __bitwise__ __attribute__((bitwise))
#else
#define __bitwise__
#endif
typedef __u16 __bitwise__ __le16;
typedef __u16 __bitwise__ __be16;
typedef __u32 __bitwise__ __le32;
typedef __u32 __bitwise__ __be32;
typedef __u64 __bitwise__ __le64;
typedef __u64 __bitwise__ __be64;
typedef __u16 __bitwise__ __sum16;
typedef __u32 __bitwise__ __wsum;
#endif /* __PROFILER_H */

View File

@ -5,9 +5,7 @@
#include "runqslower.h"
#define TASK_RUNNING 0
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
#define BPF_F_CURRENT_CPU 0xffffffffULL
const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;

View File

@ -67,7 +67,8 @@ FILES= \
test-llvm.bin \
test-llvm-version.bin \
test-libaio.bin \
test-libzstd.bin
test-libzstd.bin \
test-clang-bpf-global-var.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@ -75,6 +76,7 @@ CC ?= $(CROSS_COMPILE)gcc
CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
CLANG ?= clang
all: $(FILES)
@ -321,6 +323,11 @@ $(OUTPUT)test-libaio.bin:
$(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
$(OUTPUT)test-clang-bpf-global-var.bin:
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
grep BTF_KIND_VAR
###############################
clean:

View File

@ -0,0 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
volatile int global_value_for_test = 1;

View File

@ -73,7 +73,7 @@ struct bpf_insn {
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
__u8 data[]; /* Arbitrary size */
};
struct bpf_cgroup_storage_key {
@ -210,6 +210,7 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
__MAX_BPF_ATTACH_TYPE
};
@ -325,44 +326,46 @@ enum bpf_attach_type {
#define BPF_PSEUDO_CALL 1
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
enum {
BPF_ANY = 0, /* create new element or update existing */
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
};
/* flags for BPF_MAP_CREATE command */
#define BPF_F_NO_PREALLOC (1U << 0)
enum {
BPF_F_NO_PREALLOC = (1U << 0),
/* Instead of having one common LRU list in the
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
* which can scale and perform better.
* Note, the LRU nodes (including free nodes) cannot be moved
* across different LRU lists.
*/
#define BPF_F_NO_COMMON_LRU (1U << 1)
BPF_F_NO_COMMON_LRU = (1U << 1),
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
#define BPF_OBJ_NAME_LEN 16U
BPF_F_NUMA_NODE = (1U << 2),
/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
BPF_F_RDONLY = (1U << 3),
BPF_F_WRONLY = (1U << 4),
/* Flag for stack_map, store build_id+offset instead of pointer */
#define BPF_F_STACK_BUILD_ID (1U << 5)
BPF_F_STACK_BUILD_ID = (1U << 5),
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
BPF_F_ZERO_SEED = (1U << 6),
/* Flags for accessing BPF object from program side. */
#define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8)
BPF_F_RDONLY_PROG = (1U << 7),
BPF_F_WRONLY_PROG = (1U << 8),
/* Clone map from listener for newly accepted socket */
#define BPF_F_CLONE (1U << 9)
BPF_F_CLONE = (1U << 9),
/* Enable memory-mapping BPF map */
#define BPF_F_MMAPABLE (1U << 10)
BPF_F_MMAPABLE = (1U << 10),
};
/* Flags for BPF_PROG_QUERY. */
@ -391,6 +394,8 @@ struct bpf_stack_build_id {
};
};
#define BPF_OBJ_NAME_LEN 16U
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@ -2909,6 +2914,42 @@ union bpf_attr {
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
*
* int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
*
* **-ENOENT** if pidns does not exists for the current task.
*
* int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
* event must have the following attributes: **PERF_SAMPLE_RAW**
* as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
* **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
*
* The *flags* are used to indicate the index in *map* for which
* the value must be put, masked with **BPF_F_INDEX_MASK**.
* Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
* to indicate that the index of the current CPU core should be
* used.
*
* The value to write, of *size*, is passed through eBPF stack and
* pointed by *data*.
*
* *ctx* is a pointer to in-kernel struct xdp_buff.
*
* This helper is similar to **bpf_perf_eventoutput**\ () but
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -3030,7 +3071,9 @@ union bpf_attr {
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
FN(read_branch_records),
FN(read_branch_records), \
FN(get_ns_current_pid_tgid), \
FN(xdp_output),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -3045,72 +3088,100 @@ enum bpf_func_id {
/* All flags used by eBPF helper functions, placed here. */
/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
#define BPF_F_INVALIDATE_HASH (1ULL << 1)
enum {
BPF_F_RECOMPUTE_CSUM = (1ULL << 0),
BPF_F_INVALIDATE_HASH = (1ULL << 1),
};
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
*/
#define BPF_F_HDR_FIELD_MASK 0xfULL
enum {
BPF_F_HDR_FIELD_MASK = 0xfULL,
};
/* BPF_FUNC_l4_csum_replace flags. */
#define BPF_F_PSEUDO_HDR (1ULL << 4)
#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
#define BPF_F_MARK_ENFORCE (1ULL << 6)
enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
};
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
#define BPF_F_INGRESS (1ULL << 0)
enum {
BPF_F_INGRESS = (1ULL << 0),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
enum {
BPF_F_TUNINFO_IPV6 = (1ULL << 0),
};
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
#define BPF_F_SKIP_FIELD_MASK 0xffULL
#define BPF_F_USER_STACK (1ULL << 8)
enum {
BPF_F_SKIP_FIELD_MASK = 0xffULL,
BPF_F_USER_STACK = (1ULL << 8),
/* flags used by BPF_FUNC_get_stackid only. */
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
#define BPF_F_REUSE_STACKID (1ULL << 10)
BPF_F_FAST_STACK_CMP = (1ULL << 9),
BPF_F_REUSE_STACKID = (1ULL << 10),
/* flags used by BPF_FUNC_get_stack only. */
#define BPF_F_USER_BUILD_ID (1ULL << 11)
BPF_F_USER_BUILD_ID = (1ULL << 11),
};
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
#define BPF_F_SEQ_NUMBER (1ULL << 3)
enum {
BPF_F_ZERO_CSUM_TX = (1ULL << 1),
BPF_F_DONT_FRAGMENT = (1ULL << 2),
BPF_F_SEQ_NUMBER = (1ULL << 3),
};
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
*/
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
enum {
BPF_F_CURRENT_NETNS = (-1L),
};
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
enum {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
};
#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
enum {
BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff,
BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56,
};
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* BPF_FUNC_sysctl_get_name flags. */
#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};
/* BPF_FUNC_sk_storage_get flags */
#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
enum {
BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
};
/* BPF_FUNC_read_branch_records flags. */
#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0)
enum {
BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
};
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
@ -3176,6 +3247,7 @@ struct __sk_buff {
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size;
};
struct bpf_tunnel_key {
@ -3528,13 +3600,14 @@ struct bpf_sock_ops {
};
/* Definitions for bpf_sock_ops_cb_flags */
#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
* supported cb flags
*/
enum {
BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0),
BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
/* Mask of all currently supported cb flags */
BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
};
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
@ -3613,8 +3686,10 @@ enum {
BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
enum {
TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
};
struct bpf_perf_event_value {
__u64 counter;
@ -3622,12 +3697,16 @@ struct bpf_perf_event_value {
__u64 running;
};
#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
#define BPF_DEVCG_ACC_READ (1ULL << 1)
#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
enum {
BPF_DEVCG_ACC_MKNOD = (1ULL << 0),
BPF_DEVCG_ACC_READ = (1ULL << 1),
BPF_DEVCG_ACC_WRITE = (1ULL << 2),
};
#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
enum {
BPF_DEVCG_DEV_BLOCK = (1ULL << 0),
BPF_DEVCG_DEV_CHAR = (1ULL << 1),
};
struct bpf_cgroup_dev_ctx {
/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
@ -3643,8 +3722,10 @@ struct bpf_raw_tracepoint_args {
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
* OUTPUT: Do lookup from egress perspective; default is ingress
*/
#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
};
enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
@ -3716,9 +3797,11 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
enum {
BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0),
BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1),
BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2),
};
struct bpf_flow_keys {
__u16 nhoff;
@ -3784,4 +3867,8 @@ struct bpf_sockopt {
__s32 retval;
};
struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -49,7 +49,8 @@
#if defined(bpf_target_x86)
#ifdef __KERNEL__
#if defined(__KERNEL__) || defined(__VMLINUX_H__)
#define PT_REGS_PARM1(x) ((x)->di)
#define PT_REGS_PARM2(x) ((x)->si)
#define PT_REGS_PARM3(x) ((x)->dx)
@ -60,7 +61,20 @@
#define PT_REGS_RC(x) ((x)->ax)
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->ip)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
#else
#ifdef __i386__
/* i386 kernel is built with -mregparm=3 */
#define PT_REGS_PARM1(x) ((x)->eax)
@ -73,7 +87,20 @@
#define PT_REGS_RC(x) ((x)->eax)
#define PT_REGS_SP(x) ((x)->esp)
#define PT_REGS_IP(x) ((x)->eip)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
#define PT_REGS_PARM4_CORE(x) 0
#define PT_REGS_PARM5_CORE(x) 0
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
#else
#define PT_REGS_PARM1(x) ((x)->rdi)
#define PT_REGS_PARM2(x) ((x)->rsi)
#define PT_REGS_PARM3(x) ((x)->rdx)
@ -84,6 +111,18 @@
#define PT_REGS_RC(x) ((x)->rax)
#define PT_REGS_SP(x) ((x)->rsp)
#define PT_REGS_IP(x) ((x)->rip)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
#endif
#endif
@ -104,6 +143,17 @@ struct pt_regs;
#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr)
#elif defined(bpf_target_arm)
#define PT_REGS_PARM1(x) ((x)->uregs[0])
@ -117,6 +167,17 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->uregs[13])
#define PT_REGS_IP(x) ((x)->uregs[12])
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
#elif defined(bpf_target_arm64)
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
@ -134,6 +195,17 @@ struct pt_regs;
#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
#elif defined(bpf_target_mips)
#define PT_REGS_PARM1(x) ((x)->regs[4])
@ -147,6 +219,17 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
#elif defined(bpf_target_powerpc)
#define PT_REGS_PARM1(x) ((x)->gpr[3])
@ -158,6 +241,15 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->nip)
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
#elif defined(bpf_target_sparc)
#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
@ -169,11 +261,22 @@ struct pt_regs;
#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
/* Should this also be a bpf_target check for the sparc case? */
#if defined(__arch64__)
#define PT_REGS_IP(x) ((x)->tpc)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
#else
#define PT_REGS_IP(x) ((x)->pc)
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
#endif
#endif
@ -192,4 +295,122 @@ struct pt_regs;
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
#define ___bpf_concat(a, b) a ## b
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
#define ___bpf_narg(...) \
___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define ___bpf_empty(...) \
___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
#define ___bpf_ctx_cast0() ctx
#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
#define ___bpf_ctx_cast(args...) \
___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
/*
* BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
* similar kinds of BPF programs, that accept input arguments as a single
* pointer to untyped u64 array, where each u64 can actually be a typed
* pointer or integer of different size. Instead of requring user to write
* manual casts and work with array elements by index, BPF_PROG macro
* allows user to declare a list of named and typed input arguments in the
* same syntax as for normal C function. All the casting is hidden and
* performed transparently, while user code can just assume working with
* function arguments of specified type and name.
*
* Original raw context argument is preserved as well as 'ctx' argument.
* This is useful when using BPF helpers that expect original context
* as one of the parameters (e.g., for bpf_perf_event_output()).
*/
#define BPF_PROG(name, args...) \
name(unsigned long long *ctx); \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(unsigned long long *ctx, ##args); \
typeof(name(0)) name(unsigned long long *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
return ____##name(___bpf_ctx_cast(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(unsigned long long *ctx, ##args)
struct pt_regs;
#define ___bpf_kprobe_args0() ctx
#define ___bpf_kprobe_args1(x) \
___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
#define ___bpf_kprobe_args2(x, args...) \
___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
#define ___bpf_kprobe_args3(x, args...) \
___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
#define ___bpf_kprobe_args4(x, args...) \
___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
#define ___bpf_kprobe_args5(x, args...) \
___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
#define ___bpf_kprobe_args(args...) \
___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
* tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
* low-level way of getting kprobe input arguments from struct pt_regs, and
* provides a familiar typed and named function arguments syntax and
* semantics of accessing kprobe input paremeters.
*
* Original struct pt_regs* context is preserved as 'ctx' argument. This might
* be necessary when using BPF helpers like bpf_perf_event_output().
*/
#define BPF_KPROBE(name, args...) \
name(struct pt_regs *ctx); \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
return ____##name(___bpf_kprobe_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
#define ___bpf_kretprobe_args0() ctx
#define ___bpf_kretprobe_args1(x) \
___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx)
#define ___bpf_kretprobe_args(args...) \
___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
* return value (in addition to `struct pt_regs *ctx`), but no input
* arguments, because they will be clobbered by the time probed function
* returns.
*/
#define BPF_KRETPROBE(name, args...) \
name(struct pt_regs *ctx); \
static __attribute__((always_inline)) typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
return ____##name(___bpf_kretprobe_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
#endif

View File

@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
/* enumerators share namespace with typedef idents */
dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
if (dup_cnt > 1) {
btf_dump_printf(d, "\n%s%s___%zu = %d,",
btf_dump_printf(d, "\n%s%s___%zu = %u,",
pfx(lvl + 1), name, dup_cnt,
(__s32)v->val);
(__u32)v->val);
} else {
btf_dump_printf(d, "\n%s%s = %d,",
btf_dump_printf(d, "\n%s%s = %u,",
pfx(lvl + 1), name,
(__s32)v->val);
(__u32)v->val);
}
}
btf_dump_printf(d, "\n%s}", pfx(lvl));
@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
return -EINVAL;
fname = OPTS_GET(opts, field_name, NULL);
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
btf_dump_emit_type_decl(d, id, fname, lvl);
return 0;

View File

@ -2284,9 +2284,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
}
}
static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.st_ops_shndx >= 0 || obj->nr_extern > 0;
return obj->efile.btf_maps_shndx >= 0 ||
obj->efile.st_ops_shndx >= 0 ||
obj->nr_extern > 0;
}
static bool kernel_needs_btf(const struct bpf_object *obj)
{
return obj->efile.st_ops_shndx >= 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@ -2322,7 +2329,7 @@ static int bpf_object__init_btf(struct bpf_object *obj,
}
}
out:
if (err && bpf_object__is_btf_mandatory(obj)) {
if (err && libbpf_needs_btf(obj)) {
pr_warn("BTF is required, but is missing or corrupted.\n");
return err;
}
@ -2346,7 +2353,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
btf_ext__free(obj->btf_ext);
obj->btf_ext = NULL;
if (bpf_object__is_btf_mandatory(obj)) {
if (libbpf_needs_btf(obj)) {
pr_warn("BTF is required, but is missing or corrupted.\n");
return -ENOENT;
}
@ -2410,7 +2417,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
obj->btf_ext = NULL;
}
if (bpf_object__is_btf_mandatory(obj))
if (kernel_needs_btf(obj))
return err;
}
return 0;
@ -3866,6 +3873,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
if (str_is_empty(targ_name))
continue;
t = skip_mods_and_typedefs(targ_btf, i, NULL);
if (!btf_is_composite(t) && !btf_is_array(t))
continue;
targ_essent_len = bpf_core_essential_name_len(targ_name);
if (targ_essent_len != local_essent_len)
continue;
@ -6288,6 +6299,10 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FENTRY,
.is_attach_btf = true,
.attach_fn = attach_trace),
SEC_DEF("fmod_ret/", TRACING,
.expected_attach_type = BPF_MODIFY_RETURN,
.is_attach_btf = true,
.attach_fn = attach_trace),
SEC_DEF("fexit/", TRACING,
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
@ -6931,6 +6946,8 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_link {
int (*detach)(struct bpf_link *link);
int (*destroy)(struct bpf_link *link);
char *pin_path; /* NULL, if not pinned */
int fd; /* hook FD, -1 if not applicable */
bool disconnected;
};
@ -6960,26 +6977,109 @@ int bpf_link__destroy(struct bpf_link *link)
err = link->detach(link);
if (link->destroy)
link->destroy(link);
if (link->pin_path)
free(link->pin_path);
free(link);
return err;
}
struct bpf_link_fd {
struct bpf_link link; /* has to be at the top of struct */
int fd; /* hook FD */
};
int bpf_link__fd(const struct bpf_link *link)
{
return link->fd;
}
const char *bpf_link__pin_path(const struct bpf_link *link)
{
return link->pin_path;
}
static int bpf_link__detach_fd(struct bpf_link *link)
{
return close(link->fd);
}
struct bpf_link *bpf_link__open(const char *path)
{
struct bpf_link *link;
int fd;
fd = bpf_obj_get(path);
if (fd < 0) {
fd = -errno;
pr_warn("failed to open link at %s: %d\n", path, fd);
return ERR_PTR(fd);
}
link = calloc(1, sizeof(*link));
if (!link) {
close(fd);
return ERR_PTR(-ENOMEM);
}
link->detach = &bpf_link__detach_fd;
link->fd = fd;
link->pin_path = strdup(path);
if (!link->pin_path) {
bpf_link__destroy(link);
return ERR_PTR(-ENOMEM);
}
return link;
}
int bpf_link__pin(struct bpf_link *link, const char *path)
{
int err;
if (link->pin_path)
return -EBUSY;
err = make_parent_dir(path);
if (err)
return err;
err = check_path(path);
if (err)
return err;
link->pin_path = strdup(path);
if (!link->pin_path)
return -ENOMEM;
if (bpf_obj_pin(link->fd, link->pin_path)) {
err = -errno;
zfree(&link->pin_path);
return err;
}
pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
return 0;
}
int bpf_link__unpin(struct bpf_link *link)
{
int err;
if (!link->pin_path)
return -EINVAL;
err = unlink(link->pin_path);
if (err != 0)
return -errno;
pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
zfree(&link->pin_path);
return 0;
}
static int bpf_link__detach_perf_event(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
int err;
err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
if (err)
err = -errno;
close(l->fd);
close(link->fd);
return err;
}
@ -6987,7 +7087,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int pfd)
{
char errmsg[STRERR_BUFSIZE];
struct bpf_link_fd *link;
struct bpf_link *link;
int prog_fd, err;
if (pfd < 0) {
@ -7005,7 +7105,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
link->link.detach = &bpf_link__detach_perf_event;
link->detach = &bpf_link__detach_perf_event;
link->fd = pfd;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
@ -7024,7 +7124,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return ERR_PTR(err);
}
return (struct bpf_link *)link;
return link;
}
/*
@ -7312,18 +7412,11 @@ out:
return link;
}
static int bpf_link__detach_fd(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
return close(l->fd);
}
struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name)
{
char errmsg[STRERR_BUFSIZE];
struct bpf_link_fd *link;
struct bpf_link *link;
int prog_fd, pfd;
prog_fd = bpf_program__fd(prog);
@ -7336,7 +7429,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
link->link.detach = &bpf_link__detach_fd;
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
if (pfd < 0) {
@ -7348,7 +7441,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return ERR_PTR(pfd);
}
link->fd = pfd;
return (struct bpf_link *)link;
return link;
}
static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
@ -7362,7 +7455,7 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
{
char errmsg[STRERR_BUFSIZE];
struct bpf_link_fd *link;
struct bpf_link *link;
int prog_fd, pfd;
prog_fd = bpf_program__fd(prog);
@ -7375,7 +7468,7 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
link->link.detach = &bpf_link__detach_fd;
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
if (pfd < 0) {
@ -7409,10 +7502,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
static int bpf_link__detach_struct_ops(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
__u32 zero = 0;
if (bpf_map_delete_elem(l->fd, &zero))
if (bpf_map_delete_elem(link->fd, &zero))
return -errno;
return 0;
@ -7421,7 +7513,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link)
struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
{
struct bpf_struct_ops *st_ops;
struct bpf_link_fd *link;
struct bpf_link *link;
__u32 i, zero = 0;
int err;
@ -7453,10 +7545,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
return ERR_PTR(err);
}
link->link.detach = bpf_link__detach_struct_ops;
link->detach = bpf_link__detach_struct_ops;
link->fd = map->fd;
return (struct bpf_link *)link;
return link;
}
enum bpf_perf_event_ret

View File

@ -219,6 +219,11 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_link;
LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);

View File

@ -238,5 +238,10 @@ LIBBPF_0.0.7 {
LIBBPF_0.0.8 {
global:
bpf_link__fd;
bpf_link__open;
bpf_link__pin;
bpf_link__pin_path;
bpf_link__unpin;
bpf_program__set_attach_target;
} LIBBPF_0.0.7;

View File

@ -106,6 +106,7 @@ ifneq ($(silent),1)
ifneq ($(V),1)
QUIET_CC = @echo ' CC '$@;
QUIET_CC_FPIC = @echo ' CC FPIC '$@;
QUIET_CLANG = @echo ' CLANG '$@;
QUIET_AR = @echo ' AR '$@;
QUIET_LINK = @echo ' LINK '$@;
QUIET_MKDIR = @echo ' MKDIR '$@;

View File

@ -31,6 +31,7 @@ test_tcp_check_syncookie_user
test_sysctl
test_hashmap
test_btf_dump
test_current_pid_tgid_new_ns
xdping
test_cpp
*.skel.h

View File

@ -20,8 +20,9 @@ CLANG ?= clang
LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \
-I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
-I$(APIDIR) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
@ -32,7 +33,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_progs-no_alu32
test_progs-no_alu32 \
test_current_pid_tgid_new_ns
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
@ -129,10 +131,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
$(CC) -c $(CFLAGS) -o $@ $<
VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))
VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
$(OUTPUT)/runqslower: $(BPFOBJ)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
@ -172,6 +177,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
mkdir -p $@
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
$(call msg,GEN,,$@)
$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
@ -190,8 +199,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \
-I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include)
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
-I$(abspath $(OUTPUT)/../usr/include)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
-Wno-compare-distinct-pointer-types
@ -280,6 +289,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \

View File

@ -6,7 +6,7 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \

View File

@ -1,120 +0,0 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BPF_TRACE_HELPERS_H
#define __BPF_TRACE_HELPERS_H
#include <bpf/bpf_helpers.h>
#define ___bpf_concat(a, b) a ## b
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
#define ___bpf_narg(...) \
___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define ___bpf_empty(...) \
___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
#define ___bpf_ctx_cast0() ctx
#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
#define ___bpf_ctx_cast(args...) \
___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
/*
* BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
* similar kinds of BPF programs, that accept input arguments as a single
* pointer to untyped u64 array, where each u64 can actually be a typed
* pointer or integer of different size. Instead of requring user to write
* manual casts and work with array elements by index, BPF_PROG macro
* allows user to declare a list of named and typed input arguments in the
* same syntax as for normal C function. All the casting is hidden and
* performed transparently, while user code can just assume working with
* function arguments of specified type and name.
*
* Original raw context argument is preserved as well as 'ctx' argument.
* This is useful when using BPF helpers that expect original context
* as one of the parameters (e.g., for bpf_perf_event_output()).
*/
#define BPF_PROG(name, args...) \
name(unsigned long long *ctx); \
static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args); \
typeof(name(0)) name(unsigned long long *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
return ____##name(___bpf_ctx_cast(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args)
struct pt_regs;
#define ___bpf_kprobe_args0() ctx
#define ___bpf_kprobe_args1(x) \
___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
#define ___bpf_kprobe_args2(x, args...) \
___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
#define ___bpf_kprobe_args3(x, args...) \
___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
#define ___bpf_kprobe_args4(x, args...) \
___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
#define ___bpf_kprobe_args5(x, args...) \
___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
#define ___bpf_kprobe_args(args...) \
___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
/*
* BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
* tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
* low-level way of getting kprobe input arguments from struct pt_regs, and
* provides a familiar typed and named function arguments syntax and
* semantics of accessing kprobe input paremeters.
*
* Original struct pt_regs* context is preserved as 'ctx' argument. This might
* be necessary when using BPF helpers like bpf_perf_event_output().
*/
#define BPF_KPROBE(name, args...) \
name(struct pt_regs *ctx); \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
return ____##name(___bpf_kprobe_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
#define ___bpf_kretprobe_args0() ctx
#define ___bpf_kretprobe_argsN(x, args...) \
___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx)
#define ___bpf_kretprobe_args(args...) \
___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args)
/*
* BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all
* input kprobe arguments, one last extra argument has to be specified, which
* captures kprobe return value.
*/
#define BPF_KRETPROBE(name, args...) \
name(struct pt_regs *ctx); \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
return ____##name(___bpf_kretprobe_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
#endif

View File

@ -6,7 +6,7 @@
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(void)
{

View File

@ -6,7 +6,7 @@
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int map_fd = -1;

View File

@ -8,7 +8,7 @@
#define BAR "/foo/bar/"
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int verdict)
{

View File

@ -1,22 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
#include "fexit_test.skel.h"
void test_fentry_fexit(void)
{
struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
struct fexit_test *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
__u32 duration = 0, retval;
int err, pkt_fd, i;
int err, prog_fd, i;
pkt_skel = test_pkt_access__open_and_load();
if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto close_prog;
@ -31,8 +26,8 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
prog_fd = bpf_program__fd(fexit_skel->progs.test1);
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
@ -49,7 +44,6 @@ void test_fentry_fexit(void)
}
close_prog:
test_pkt_access__destroy(pkt_skel);
fentry_test__destroy(fentry_skel);
fexit_test__destroy(fexit_skel);
}

View File

@ -1,20 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
void test_fentry_test(void)
{
struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
int err, pkt_fd, i;
int err, prog_fd, i;
__u32 duration = 0, retval;
__u64 *result;
pkt_skel = test_pkt_access__open_and_load();
if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto cleanup;
@ -23,10 +18,10 @@ void test_fentry_test(void)
if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto cleanup;
pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
prog_fd = bpf_program__fd(fentry_skel->progs.test1);
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
@ -39,5 +34,4 @@ void test_fentry_test(void)
cleanup:
fentry_test__destroy(fentry_skel);
test_pkt_access__destroy(pkt_skel);
}

View File

@ -1,64 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "fexit_test.skel.h"
void test_fexit_test(void)
{
struct bpf_prog_load_attr attr = {
.file = "./fexit_test.o",
};
char prog_name[] = "fexit/bpf_fentry_testX";
struct bpf_object *obj = NULL, *pkt_obj;
int err, pkt_fd, kfree_skb_fd, i;
struct bpf_link *link[6] = {};
struct bpf_program *prog[6];
struct fexit_test *fexit_skel = NULL;
int err, prog_fd, i;
__u32 duration = 0, retval;
struct bpf_map *data_map;
const int zero = 0;
u64 result[6];
__u64 *result;
err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
&pkt_obj, &pkt_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
return;
err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
goto close_prog;
fexit_skel = fexit_test__open_and_load();
if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
goto cleanup;
for (i = 0; i < 6; i++) {
prog_name[sizeof(prog_name) - 2] = '1' + i;
prog[i] = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
goto close_prog;
link[i] = bpf_program__attach_trace(prog[i]);
if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
goto close_prog;
}
data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
if (CHECK(!data_map, "find_data_map", "data map not found\n"))
goto close_prog;
err = fexit_test__attach(fexit_skel);
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto cleanup;
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
prog_fd = bpf_program__fd(fexit_skel->progs.test1);
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
result = (__u64 *)fexit_skel->bss;
for (i = 0; i < 6; i++) {
if (CHECK(result[i] != 1, "result",
"fexit_test%d failed err %lld\n", i + 1, result[i]))
goto cleanup;
}
for (i = 0; i < 6; i++)
if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
i + 1, result[i]))
goto close_prog;
close_prog:
for (i = 0; i < 6; i++)
if (!IS_ERR_OR_NULL(link[i]))
bpf_link__destroy(link[i]);
bpf_object__close(obj);
bpf_object__close(pkt_obj);
cleanup:
fexit_test__destroy(fexit_skel);
}

View File

@ -0,0 +1,105 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
#include <sys/stat.h>
#include "test_link_pinning.skel.h"
static int duration = 0;
void test_link_pinning_subtest(struct bpf_program *prog,
struct test_link_pinning__bss *bss)
{
const char *link_pin_path = "/sys/fs/bpf/pinned_link_test";
struct stat statbuf = {};
struct bpf_link *link;
int err, i;
link = bpf_program__attach(prog);
if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
goto cleanup;
bss->in = 1;
usleep(1);
CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out);
/* pin link */
err = bpf_link__pin(link, link_pin_path);
if (CHECK(err, "link_pin", "err: %d\n", err))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1",
"exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
/* check that link was pinned */
err = stat(link_pin_path, &statbuf);
if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno))
goto cleanup;
bss->in = 2;
usleep(1);
CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out);
/* destroy link, pinned link should keep program attached */
bpf_link__destroy(link);
link = NULL;
bss->in = 3;
usleep(1);
CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out);
/* re-open link from BPFFS */
link = bpf_link__open(link_pin_path);
if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
"exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
/* unpin link from BPFFS, program still attached */
err = bpf_link__unpin(link);
if (CHECK(err, "link_unpin", "err: %d\n", err))
goto cleanup;
/* still active, as we have FD open now */
bss->in = 4;
usleep(1);
CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out);
bpf_link__destroy(link);
link = NULL;
/* Validate it's finally detached.
* Actual detachment might get delayed a bit, so there is no reliable
* way to validate it immediately here, let's count up for long enough
* and see if eventually output stops being updated
*/
for (i = 5; i < 10000; i++) {
bss->in = i;
usleep(1);
if (bss->out == i - 1)
break;
}
CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
cleanup:
if (!IS_ERR(link))
bpf_link__destroy(link);
}
void test_link_pinning(void)
{
struct test_link_pinning* skel;
skel = test_link_pinning__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
if (test__start_subtest("pin_raw_tp"))
test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss);
if (test__start_subtest("pin_tp_btf"))
test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss);
test_link_pinning__destroy(skel);
}

View File

@ -0,0 +1,65 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2020 Google LLC.
*/
#include <test_progs.h>
#include "modify_return.skel.h"
#define LOWER(x) ((x) & 0xffff)
#define UPPER(x) ((x) >> 16)
static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
{
struct modify_return *skel = NULL;
int err, prog_fd;
__u32 duration = 0, retval;
__u16 side_effect;
__s16 ret;
skel = modify_return__open_and_load();
if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
goto cleanup;
err = modify_return__attach(skel);
if (CHECK(err, "modify_return", "attach failed: %d\n", err))
goto cleanup;
skel->bss->input_retval = input_retval;
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
&retval, &duration);
CHECK(err, "test_run", "err %d errno %d\n", err, errno);
side_effect = UPPER(retval);
ret = LOWER(retval);
CHECK(ret != want_ret, "test_run",
"unexpected ret: %d, expected: %d\n", ret, want_ret);
CHECK(side_effect != want_side_effect, "modify_return",
"unexpected side_effect: %d\n", side_effect);
CHECK(skel->bss->fentry_result != 1, "modify_return",
"fentry failed\n");
CHECK(skel->bss->fexit_result != 1, "modify_return",
"fexit failed\n");
CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
"fmod_ret failed\n");
cleanup:
modify_return__destroy(skel);
}
void test_modify_return(void)
{
run_test(0 /* input_retval */,
1 /* want_side_effect */,
4 /* want_ret */);
run_test(-EINVAL /* input_retval */,
0 /* want_side_effect */,
-EINVAL /* want_ret */);
}

View File

@ -0,0 +1,88 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
#include <test_progs.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/syscall.h>
struct bss {
__u64 dev;
__u64 ino;
__u64 pid_tgid;
__u64 user_pid_tgid;
};
void test_ns_current_pid_tgid(void)
{
const char *probe_name = "raw_tracepoint/sys_enter";
const char *file = "test_ns_current_pid_tgid.o";
int err, key = 0, duration = 0;
struct bpf_link *link = NULL;
struct bpf_program *prog;
struct bpf_map *bss_map;
struct bpf_object *obj;
struct bss bss;
struct stat st;
__u64 id;
obj = bpf_object__open_file(file, NULL);
if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
return;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
goto cleanup;
bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
if (CHECK(!bss_map, "find_bss_map", "failed\n"))
goto cleanup;
prog = bpf_object__find_program_by_title(obj, probe_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
probe_name))
goto cleanup;
memset(&bss, 0, sizeof(bss));
pid_t tid = syscall(SYS_gettid);
pid_t pid = getpid();
id = (__u64) tid << 32 | pid;
bss.user_pid_tgid = id;
if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) {
perror("Failed to stat /proc/self/ns/pid");
goto cleanup;
}
bss.dev = st.st_dev;
bss.ino = st.st_ino;
err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err))
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
PTR_ERR(link))) {
link = NULL;
goto cleanup;
}
/* trigger some syscalls */
usleep(1);
err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
if (CHECK(err, "set_bss", "failed to get bss : %d\n", err))
goto cleanup;
if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
"User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
goto cleanup;
cleanup:
if (!link) {
bpf_link__destroy(link);
link = NULL;
}
bpf_object__close(obj);
}

View File

@ -805,12 +805,6 @@ static void test_config(int sotype, sa_family_t family, bool inany)
char s[MAX_TEST_NAME];
const struct test *t;
/* SOCKMAP/SOCKHASH don't support UDP yet */
if (sotype == SOCK_DGRAM &&
(inner_map_type == BPF_MAP_TYPE_SOCKMAP ||
inner_map_type == BPF_MAP_TYPE_SOCKHASH))
return;
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
if (t->need_sotype && t->need_sotype != sotype)
continue; /* test not compatible with socket type */

View File

@ -14,6 +14,7 @@ void test_skb_ctx(void)
.wire_len = 100,
.gso_segs = 8,
.mark = 9,
.gso_size = 10,
};
struct bpf_prog_test_run_attr tattr = {
.data_in = &pkt_v4,

View File

@ -16,6 +16,7 @@
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <unistd.h>
#include <bpf/bpf.h>
@ -25,6 +26,7 @@
#include "test_progs.h"
#include "test_sockmap_listen.skel.h"
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
@ -44,9 +46,10 @@
/* Wrappers that fail the test on error and report it. */
#define xaccept(fd, addr, len) \
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = accept((fd), (addr), (len)); \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
@ -108,6 +111,23 @@
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
@ -175,6 +195,40 @@
__ret; \
})
static int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
@ -230,7 +284,7 @@ static int enable_reuseport(int s, int progfd)
return 0;
}
static int listen_loopback_reuseport(int family, int sotype, int progfd)
static int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len;
@ -249,6 +303,9 @@ static int listen_loopback_reuseport(int family, int sotype, int progfd)
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
@ -259,9 +316,9 @@ close:
return -1;
}
static int listen_loopback(int family, int sotype)
static int socket_loopback(int family, int sotype)
{
return listen_loopback_reuseport(family, sotype, -1);
return socket_loopback_reuseport(family, sotype, -1);
}
static void test_insert_invalid(int family, int sotype, int mapfd)
@ -327,13 +384,13 @@ close:
xclose(s);
}
static void test_insert_listening(int family, int sotype, int mapfd)
static void test_insert(int family, int sotype, int mapfd)
{
u64 value;
u32 key;
int s;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -349,7 +406,7 @@ static void test_delete_after_insert(int family, int sotype, int mapfd)
u32 key;
int s;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -366,7 +423,7 @@ static void test_delete_after_close(int family, int sotype, int mapfd)
u64 value;
u32 key;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -390,7 +447,7 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd)
u32 key;
int s;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -417,7 +474,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
u64 value;
u32 key;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -439,7 +496,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
u32 key, value32;
int err, s;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -464,17 +521,17 @@ close:
xclose(s);
}
static void test_update_listening(int family, int sotype, int mapfd)
static void test_update_existing(int family, int sotype, int mapfd)
{
int s1, s2;
u64 value;
u32 key;
s1 = listen_loopback(family, sotype);
s1 = socket_loopback(family, sotype);
if (s1 < 0)
return;
s2 = listen_loopback(family, sotype);
s2 = socket_loopback(family, sotype);
if (s2 < 0)
goto close_s1;
@ -500,7 +557,7 @@ static void test_destroy_orphan_child(int family, int sotype, int mapfd)
u64 value;
u32 key;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -534,7 +591,7 @@ static void test_clone_after_delete(int family, int sotype, int mapfd)
u64 value;
u32 key;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype);
if (s < 0)
return;
@ -570,7 +627,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd)
socklen_t len;
u64 value;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype | SOCK_NONBLOCK);
if (s == -1)
return;
@ -598,7 +655,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd)
if (err)
goto close_cli;
p = xaccept(s, NULL, NULL);
p = xaccept_nonblock(s, NULL, NULL);
if (p == -1)
goto close_cli;
@ -624,7 +681,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd)
socklen_t len;
u64 value;
s = listen_loopback(family, sotype);
s = socket_loopback(family, sotype | SOCK_NONBLOCK);
if (s == -1)
return;
@ -647,7 +704,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd)
if (err)
goto close_cli;
p = xaccept(s, NULL, NULL);
p = xaccept_nonblock(s, NULL, NULL);
if (p == -1)
goto close_cli;
@ -711,7 +768,7 @@ static void *connect_accept_thread(void *arg)
break;
}
p = xaccept(s, NULL, NULL);
p = xaccept_nonblock(s, NULL, NULL);
if (p < 0) {
xclose(c);
break;
@ -735,7 +792,7 @@ static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
int err, s;
u64 value;
s = listen_loopback(family, sotype | SOCK_NONBLOCK);
s = socket_loopback(family, sotype | SOCK_NONBLOCK);
if (s < 0)
return;
@ -877,7 +934,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
zero_verdict_count(verd_mapfd);
s = listen_loopback(family, sotype | SOCK_NONBLOCK);
s = socket_loopback(family, sotype | SOCK_NONBLOCK);
if (s < 0)
return;
@ -893,7 +950,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli0;
p0 = xaccept(s, NULL, NULL);
p0 = xaccept_nonblock(s, NULL, NULL);
if (p0 < 0)
goto close_cli0;
@ -904,7 +961,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli1;
p1 = xaccept(s, NULL, NULL);
p1 = xaccept_nonblock(s, NULL, NULL);
if (p1 < 0)
goto close_cli1;
@ -1009,7 +1066,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
zero_verdict_count(verd_mapfd);
s = listen_loopback(family, sotype | SOCK_NONBLOCK);
s = socket_loopback(family, sotype | SOCK_NONBLOCK);
if (s < 0)
return;
@ -1025,7 +1082,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli;
p = xaccept(s, NULL, NULL);
p = xaccept_nonblock(s, NULL, NULL);
if (p < 0)
goto close_cli;
@ -1113,14 +1170,15 @@ static void test_reuseport_select_listening(int family, int sotype,
{
struct sockaddr_storage addr;
unsigned int pass;
int s, c, p, err;
int s, c, err;
socklen_t len;
u64 value;
u32 key;
zero_verdict_count(verd_map);
s = listen_loopback_reuseport(family, sotype, reuseport_prog);
s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
reuseport_prog);
if (s < 0)
return;
@ -1142,19 +1200,33 @@ static void test_reuseport_select_listening(int family, int sotype,
if (err)
goto close_cli;
p = xaccept(s, NULL, NULL);
if (p < 0)
goto close_cli;
if (sotype == SOCK_STREAM) {
int p;
p = xaccept_nonblock(s, NULL, NULL);
if (p < 0)
goto close_cli;
xclose(p);
} else {
char b = 'a';
ssize_t n;
n = xsend(c, &b, sizeof(b), 0);
if (n == -1)
goto close_cli;
n = xrecv_nonblock(s, &b, sizeof(b), 0);
if (n == -1)
goto close_cli;
}
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_map, &key, &pass);
if (err)
goto close_peer;
goto close_cli;
if (pass != 1)
FAIL("want pass count 1, have %d", pass);
close_peer:
xclose(p);
close_cli:
xclose(c);
close_srv:
@ -1174,7 +1246,7 @@ static void test_reuseport_select_connected(int family, int sotype,
zero_verdict_count(verd_map);
s = listen_loopback_reuseport(family, sotype, reuseport_prog);
s = socket_loopback_reuseport(family, sotype, reuseport_prog);
if (s < 0)
return;
@ -1198,9 +1270,24 @@ static void test_reuseport_select_connected(int family, int sotype,
if (err)
goto close_cli0;
p0 = xaccept(s, NULL, NULL);
if (err)
goto close_cli0;
if (sotype == SOCK_STREAM) {
p0 = xaccept_nonblock(s, NULL, NULL);
if (p0 < 0)
goto close_cli0;
} else {
p0 = xsocket(family, sotype, 0);
if (p0 < 0)
goto close_cli0;
len = sizeof(addr);
err = xgetsockname(c0, sockaddr(&addr), &len);
if (err)
goto close_cli0;
err = xconnect(p0, sockaddr(&addr), len);
if (err)
goto close_cli0;
}
/* Update sock_map[0] to redirect to a connected socket */
key = 0;
@ -1213,8 +1300,24 @@ static void test_reuseport_select_connected(int family, int sotype,
if (c1 < 0)
goto close_peer0;
len = sizeof(addr);
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
goto close_srv;
errno = 0;
err = connect(c1, sockaddr(&addr), len);
if (sotype == SOCK_DGRAM) {
char b = 'a';
ssize_t n;
n = xsend(c1, &b, sizeof(b), 0);
if (n == -1)
goto close_cli1;
n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
err = n == -1;
}
if (!err || errno != ECONNREFUSED)
FAIL_ERRNO("connect: expected ECONNREFUSED");
@ -1249,11 +1352,11 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
zero_verdict_count(verd_map);
/* Create two listeners, each in its own reuseport group */
s1 = listen_loopback_reuseport(family, sotype, reuseport_prog);
s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
if (s1 < 0)
return;
s2 = listen_loopback_reuseport(family, sotype, reuseport_prog);
s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
if (s2 < 0)
goto close_srv1;
@ -1278,7 +1381,18 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
goto close_srv2;
err = connect(c, sockaddr(&addr), len);
if (err && errno != ECONNREFUSED) {
if (sotype == SOCK_DGRAM) {
char b = 'a';
ssize_t n;
n = xsend(c, &b, sizeof(b), 0);
if (n == -1)
goto close_cli;
n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
err = n == -1;
}
if (!err || errno != ECONNREFUSED) {
FAIL_ERRNO("connect: expected ECONNREFUSED");
goto close_cli;
}
@ -1299,9 +1413,9 @@ close_srv1:
xclose(s1);
}
#define TEST(fn) \
#define TEST(fn, ...) \
{ \
fn, #fn \
fn, #fn, __VA_ARGS__ \
}
static void test_ops_cleanup(const struct bpf_map *map)
@ -1350,18 +1464,31 @@ static const char *map_type_str(const struct bpf_map *map)
}
}
static const char *sotype_str(int sotype)
{
switch (sotype) {
case SOCK_DGRAM:
return "UDP";
case SOCK_STREAM:
return "TCP";
default:
return "unknown";
}
}
static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
int family, int sotype)
{
const struct op_test {
void (*fn)(int family, int sotype, int mapfd);
const char *name;
int sotype;
} tests[] = {
/* insert */
TEST(test_insert_invalid),
TEST(test_insert_opened),
TEST(test_insert_bound),
TEST(test_insert_listening),
TEST(test_insert_bound, SOCK_STREAM),
TEST(test_insert),
/* delete */
TEST(test_delete_after_insert),
TEST(test_delete_after_close),
@ -1370,28 +1497,32 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
TEST(test_lookup_after_delete),
TEST(test_lookup_32_bit_value),
/* update */
TEST(test_update_listening),
TEST(test_update_existing),
/* races with insert/delete */
TEST(test_destroy_orphan_child),
TEST(test_syn_recv_insert_delete),
TEST(test_race_insert_listen),
TEST(test_destroy_orphan_child, SOCK_STREAM),
TEST(test_syn_recv_insert_delete, SOCK_STREAM),
TEST(test_race_insert_listen, SOCK_STREAM),
/* child clone */
TEST(test_clone_after_delete),
TEST(test_accept_after_delete),
TEST(test_accept_before_delete),
TEST(test_clone_after_delete, SOCK_STREAM),
TEST(test_accept_after_delete, SOCK_STREAM),
TEST(test_accept_before_delete, SOCK_STREAM),
};
const char *family_name, *map_name;
const char *family_name, *map_name, *sotype_name;
const struct op_test *t;
char s[MAX_TEST_NAME];
int map_fd;
family_name = family_str(family);
map_name = map_type_str(map);
sotype_name = sotype_str(sotype);
map_fd = bpf_map__fd(map);
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
t->name);
snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
sotype_name, t->name);
if (t->sotype != 0 && t->sotype != sotype)
continue;
if (!test__start_subtest(s))
continue;
@ -1424,6 +1555,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
t->name);
if (!test__start_subtest(s))
continue;
@ -1438,26 +1570,31 @@ static void test_reuseport(struct test_sockmap_listen *skel,
void (*fn)(int family, int sotype, int socket_map,
int verdict_map, int reuseport_prog);
const char *name;
int sotype;
} tests[] = {
TEST(test_reuseport_select_listening),
TEST(test_reuseport_select_connected),
TEST(test_reuseport_mixed_groups),
};
int socket_map, verdict_map, reuseport_prog;
const char *family_name, *map_name;
const char *family_name, *map_name, *sotype_name;
const struct reuseport_test *t;
char s[MAX_TEST_NAME];
family_name = family_str(family);
map_name = map_type_str(map);
sotype_name = sotype_str(sotype);
socket_map = bpf_map__fd(map);
verdict_map = bpf_map__fd(skel->maps.verdict_map);
reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
t->name);
snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
sotype_name, t->name);
if (t->sotype != 0 && t->sotype != sotype)
continue;
if (!test__start_subtest(s))
continue;
@ -1470,8 +1607,10 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
test_ops(skel, map, family, SOCK_STREAM);
test_ops(skel, map, family, SOCK_DGRAM);
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
}
void test_sockmap_listen(void)

View File

@ -188,7 +188,7 @@ static int start_server(void)
};
int fd;
fd = socket(AF_INET, SOCK_STREAM, 0);
fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
@ -205,6 +205,7 @@ static int start_server(void)
static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
static volatile bool server_done = false;
static void *server_thread(void *arg)
{
@ -222,23 +223,24 @@ static void *server_thread(void *arg)
if (CHECK_FAIL(err < 0)) {
perror("Failed to listed on socket");
return NULL;
return ERR_PTR(err);
}
client_fd = accept(fd, (struct sockaddr *)&addr, &len);
while (!server_done) {
client_fd = accept(fd, (struct sockaddr *)&addr, &len);
if (client_fd == -1 && errno == EAGAIN) {
usleep(50);
continue;
}
break;
}
if (CHECK_FAIL(client_fd < 0)) {
perror("Failed to accept client");
return NULL;
return ERR_PTR(err);
}
/* Wait for the next connection (that never arrives)
* to keep this thread alive to prevent calling
* close() on client_fd.
*/
if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) {
perror("Unexpected success in second accept");
return NULL;
}
while (!server_done)
usleep(50);
close(client_fd);
@ -249,6 +251,7 @@ void test_tcp_rtt(void)
{
int server_fd, cgroup_fd;
pthread_t tid;
void *server_res;
cgroup_fd = test__join_cgroup("/tcp_rtt");
if (CHECK_FAIL(cgroup_fd < 0))
@ -267,6 +270,11 @@ void test_tcp_rtt(void)
pthread_mutex_unlock(&server_started_mtx);
CHECK_FAIL(run_test(cgroup_fd, server_fd));
server_done = true;
pthread_join(tid, &server_res);
CHECK_FAIL(IS_ERR(server_res));
close_server_fd:
close(server_fd);
close_cgroup_fd:

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
#include <time.h>
#include "test_vmlinux.skel.h"
#define MY_TV_NSEC 1337
static void nsleep()
{
struct timespec ts = { .tv_nsec = MY_TV_NSEC };
(void)nanosleep(&ts, NULL);
}
void test_vmlinux(void)
{
int duration = 0, err;
struct test_vmlinux* skel;
struct test_vmlinux__bss *bss;
skel = test_vmlinux__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
bss = skel->bss;
err = test_vmlinux__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
/* trigger everything */
nsleep();
CHECK(!bss->tp_called, "tp", "not called\n");
CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
CHECK(!bss->kprobe_called, "kprobe", "not called\n");
CHECK(!bss->fentry_called, "fentry", "not called\n");
cleanup:
test_vmlinux__destroy(skel);
}

View File

@ -4,17 +4,51 @@
#include "test_xdp.skel.h"
#include "test_xdp_bpf2bpf.skel.h"
struct meta {
int ifindex;
int pkt_len;
};
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
int duration = 0;
struct meta *meta = (struct meta *)data;
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
"check_size", "size %u < %zu\n",
size, sizeof(pkt_v4) + sizeof(*meta)))
return;
if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
"meta->ifindex = %d\n", meta->ifindex))
return;
if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
"meta->pkt_len = %zd\n", sizeof(pkt_v4)))
return;
if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
"check_packet_content", "content not the same\n"))
return;
*(bool *)ctx = true;
}
void test_xdp_bpf2bpf(void)
{
__u32 duration = 0, retval, size;
char buf[128];
int err, pkt_fd, map_fd;
bool passed = false;
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
struct iptnl_info value4 = {.family = AF_INET};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
struct bpf_program *prog;
struct perf_buffer *pb = NULL;
struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void)
if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
goto out;
/* Set up perf buffer */
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
1, &pb_opts);
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
goto out;
/* Run test program */
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void)
err, errno, retval, size))
goto out;
/* Make sure bpf_xdp_output() was triggered and it sent the expected
* data to the perf ring buffer.
*/
err = perf_buffer__poll(pb, 100);
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto out;
CHECK_FAIL(!passed);
/* Verify test results */
if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
"result", "fentry failed err %llu\n",
@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void)
"fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
out:
if (pb)
perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}

View File

@ -9,7 +9,7 @@
#include <linux/bpf.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";

View File

@ -13,7 +13,7 @@ enum e1 {
enum e2 {
C = 100,
D = -100,
D = 4294967295,
E = 0,
};

View File

@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";

View File

@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
struct sk_buff {
unsigned int len;

View File

@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
struct sk_buff {
unsigned int len;

View File

@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";

View File

@ -4,7 +4,7 @@
#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {

View File

@ -0,0 +1,49 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2020 Google LLC.
*/
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
static int sequence = 0;
__s32 input_retval = 0;
__u64 fentry_result = 0;
SEC("fentry/bpf_modify_return_test")
int BPF_PROG(fentry_test, int a, __u64 b)
{
sequence++;
fentry_result = (sequence == 1);
return 0;
}
__u64 fmod_ret_result = 0;
SEC("fmod_ret/bpf_modify_return_test")
int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
{
sequence++;
/* This is the first fmod_ret program, the ret passed should be 0 */
fmod_ret_result = (sequence == 2 && ret == 0);
return input_retval;
}
__u64 fexit_result = 0;
SEC("fexit/bpf_modify_return_test")
int BPF_PROG(fexit_test, int a, __u64 b, int ret)
{
sequence++;
/* If the input_reval is non-zero a successful modification should have
* occurred.
*/
if (input_retval)
fexit_result = (sequence == 3 && ret == input_retval);
else
fexit_result = (sequence == 3 && ret == 4);
return 0;
}

View File

@ -4,6 +4,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
int kprobe_res = 0;
int kretprobe_res = 0;
@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx)
}
SEC("kretprobe/sys_nanosleep")
int handle_kretprobe(struct pt_regs *ctx)
int BPF_KRETPROBE(handle_kretprobe)
{
kretprobe_res = 2;
return 0;

View File

@ -0,0 +1,25 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
int in = 0;
int out = 0;
SEC("raw_tp/sys_enter")
int raw_tp_prog(const void *ctx)
{
out = in;
return 0;
}
SEC("tp_btf/sys_enter")
int tp_btf_prog(const void *ctx)
{
out = in;
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
static volatile struct {
__u64 dev;
__u64 ino;
__u64 pid_tgid;
__u64 user_pid_tgid;
} res;
SEC("raw_tracepoint/sys_enter")
int trace(void *ctx)
{
__u64 ns_pid_tgid, expected_pid;
struct bpf_pidns_info nsdata;
__u32 key = 0;
if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata,
sizeof(struct bpf_pidns_info)))
return 0;
ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid;
expected_pid = res.user_pid_tgid;
if (expected_pid != ns_pid_tgid)
return 0;
res.pid_tgid = ns_pid_tgid;
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -6,7 +6,6 @@
#include <linux/ptrace.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_trace_helpers.h"
struct task_struct;
@ -17,11 +16,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec)
}
SEC("kretprobe/__set_task_comm")
int BPF_KRETPROBE(prog2,
struct task_struct *tsk, const char *buf, bool exec,
int ret)
int BPF_KRETPROBE(prog2, int ret)
{
return !PT_REGS_PARM1(ctx) && ret;
return ret;
}
SEC("raw_tp/task_rename")

View File

@ -5,7 +5,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
int valid = 0;
int required_size_out = 0;

View File

@ -4,7 +4,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
#include <bpf/bpf_tracing.h>
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);

View File

@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_trace_helpers.h"
static struct sockaddr_in old;

View File

@ -23,6 +23,8 @@ int process(struct __sk_buff *skb)
return 1;
if (skb->gso_segs != 8)
return 1;
if (skb->gso_size != 10)
return 1;
return 0;
}

Some files were not shown because too many files have changed in this diff Show More