Merge branch 'bpf-jit-cleanups'

Daniel Borkmann says:

====================
This series follows up mostly with with some minor cleanups on top
of 'Move ld_abs/ld_ind to native BPF' as well as implements better
32/64 bit immediate load into register and saves tail call init on
cBPF for the arm64 JIT. Last but not least we add a couple of test
cases. For details please see individual patches. Thanks!

v1 -> v2:
  - Minor fix in i64_i16_blocks() to remove 24 shift.
  - Added last two patches.
  - Added Acks from prior round.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2018-05-14 19:11:46 -07:00
commit fb40c9ddd6
7 changed files with 229 additions and 99 deletions

View file

@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size)
#define SCRATCH_SIZE 80
/* total stack size used in JITed code */
#define _STACK_SIZE \
(ctx->prog->aux->stack_depth + \
+ SCRATCH_SIZE + \
+ 4 /* extra for skb_copy_bits buffer */)
#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
/* Get the offset of eBPF REGISTERs stored on scratch space. */
#define STACK_VAR(off) (STACK_SIZE-off-4)
/* Offset of skb_copy_bits buffer */
#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
#define STACK_VAR(off) (STACK_SIZE - off)
#if __LINUX_ARM_ARCH__ < 7

View file

@ -21,7 +21,6 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/printk.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
@ -80,37 +79,6 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
ctx->idx++;
}
static inline void emit_a64_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
u64 tmp = val;
int shift = 0;
emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
tmp >>= 16;
shift += 16;
while (tmp) {
if (tmp & 0xffff)
emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
tmp >>= 16;
shift += 16;
}
}
static inline void emit_addr_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
u64 tmp = val;
int shift = 0;
emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
for (;shift < 48;) {
tmp >>= 16;
shift += 16;
emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
}
}
static inline void emit_a64_mov_i(const int is64, const int reg,
const s32 val, struct jit_ctx *ctx)
{
@ -122,7 +90,8 @@ static inline void emit_a64_mov_i(const int is64, const int reg,
emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
} else {
emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
emit(A64_MOVK(is64, reg, lo, 0), ctx);
if (lo != 0xffff)
emit(A64_MOVK(is64, reg, lo, 0), ctx);
}
} else {
emit(A64_MOVZ(is64, reg, lo, 0), ctx);
@ -131,6 +100,59 @@ static inline void emit_a64_mov_i(const int is64, const int reg,
}
}
static int i64_i16_blocks(const u64 val, bool inverse)
{
return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
(((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
(((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
(((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
}
static inline void emit_a64_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
u64 nrm_tmp = val, rev_tmp = ~val;
bool inverse;
int shift;
if (!(nrm_tmp >> 32))
return emit_a64_mov_i(0, reg, (u32)val, ctx);
inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
(fls64(nrm_tmp) - 1)), 16), 0);
if (inverse)
emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
else
emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
shift -= 16;
while (shift >= 0) {
if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
shift -= 16;
}
}
/*
* This is an unoptimized 64 immediate emission used for BPF to BPF call
* addresses. It will always do a full 64 bit decomposition as otherwise
* more complexity in the last extra pass is required since we previously
* reserved 4 instructions for the address.
*/
static inline void emit_addr_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
u64 tmp = val;
int shift = 0;
emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
for (;shift < 48;) {
tmp >>= 16;
shift += 16;
emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
}
}
static inline int bpf2a64_offset(int bpf_to, int bpf_from,
const struct jit_ctx *ctx)
{
@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET 7
static int build_prologue(struct jit_ctx *ctx)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
const u8 r6 = bpf2a64[BPF_REG_6];
@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
* | ... | BPF prog stack
* | |
* +-----+ <= (BPF_FP - prog->aux->stack_depth)
* |RSVD | JIT scratchpad
* |RSVD | padding
* current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
* | |
* | ... | Function call stack
@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx)
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
if (!ebpf_from_cbpf) {
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
}
}
/* 4 byte extra for skb_copy_bits buffer */
ctx->stack_size = prog->aux->stack_depth + 4;
ctx->stack_size = STACK_ALIGN(ctx->stack_size);
ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@ -786,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
bool was_classic = bpf_prog_was_classic(prog);
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
@ -840,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
if (build_prologue(&ctx)) {
if (build_prologue(&ctx, was_classic)) {
prog = orig_prog;
goto out_off;
}
@ -863,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
ctx.idx = 0;
build_prologue(&ctx);
build_prologue(&ctx, was_classic);
if (build_body(&ctx)) {
bpf_jit_binary_free(header);

View file

@ -95,7 +95,6 @@ enum reg_val_type {
* struct jit_ctx - JIT context
* @skf: The sk_filter
* @stack_size: eBPF stack size
* @tmp_offset: eBPF $sp offset to 8-byte temporary memory
* @idx: Instruction index
* @flags: JIT flags
* @offsets: Instruction offsets
@ -105,7 +104,6 @@ enum reg_val_type {
struct jit_ctx {
const struct bpf_prog *skf;
int stack_size;
int tmp_offset;
u32 idx;
u32 flags;
u32 *offsets;
@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx)
locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
stack_adjust += locals_size;
ctx->tmp_offset = locals_size;
ctx->stack_size = stack_adjust;
@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
emit_instr(ctx, lui, reg, upper >> 16);
emit_instr(ctx, addiu, reg, reg, lower);
}
}
static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return 0;
}
static void * __must_check
ool_skb_header_pointer(const struct sk_buff *skb, int offset,
int len, void *buffer)
{
return skb_header_pointer(skb, offset, len, buffer);
}
static int size_to_len(const struct bpf_insn *insn)
{
switch (BPF_SIZE(insn->code)) {
case BPF_B:
return 1;
case BPF_H:
return 2;
case BPF_W:
return 4;
case BPF_DW:
return 8;
}
return 0;
}
static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
{
if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {

View file

@ -894,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const int i = insn - ctx->prog->insnsi;
const s16 off = insn->off;
const s32 imm = insn->imm;
u32 *func;
if (insn->src_reg == BPF_REG_FP)
ctx->saw_frame_pointer = true;

View file

@ -301,9 +301,9 @@ do { \
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 17
# define RETPOLINE_RAX_BPF_JIT() \
# ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 17
# define RETPOLINE_RAX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
@ -314,8 +314,8 @@ do { \
EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
EMIT1(0xC3); /* retq */ \
} while (0)
#else
# define RETPOLINE_EDX_BPF_JIT() \
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* call do_rop */ \
/* spec_trap: */ \
@ -326,17 +326,16 @@ do { \
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
EMIT1(0xC3); /* ret */ \
} while (0)
#endif
# endif
#else /* !CONFIG_RETPOLINE */
#ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 2
# define RETPOLINE_RAX_BPF_JIT() \
EMIT2(0xFF, 0xE0); /* jmp *%rax */
#else
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
#endif
# ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 2
# define RETPOLINE_RAX_BPF_JIT() \
EMIT2(0xFF, 0xE0); /* jmp *%rax */
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
# endif
#endif
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */

View file

@ -0,0 +1,80 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BPF_RAND__
#define __BPF_RAND__
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
static inline uint64_t bpf_rand_mask(uint64_t mask)
{
return (((uint64_t)(uint32_t)rand()) |
((uint64_t)(uint32_t)rand() << 32)) & mask;
}
#define bpf_rand_ux(x, m) \
static inline uint64_t bpf_rand_u##x(int shift) \
{ \
return bpf_rand_mask((m)) << shift; \
}
bpf_rand_ux( 8, 0xffULL)
bpf_rand_ux(16, 0xffffULL)
bpf_rand_ux(24, 0xffffffULL)
bpf_rand_ux(32, 0xffffffffULL)
bpf_rand_ux(40, 0xffffffffffULL)
bpf_rand_ux(48, 0xffffffffffffULL)
bpf_rand_ux(56, 0xffffffffffffffULL)
bpf_rand_ux(64, 0xffffffffffffffffULL)
static inline void bpf_semi_rand_init(void)
{
srand(time(NULL));
}
static inline uint64_t bpf_semi_rand_get(void)
{
switch (rand() % 39) {
case 0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
case 1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
case 2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
case 3: return 0x8000000000000000ULL | bpf_rand_u32(0);
case 4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
case 5: return 0x0000000100000000ULL | bpf_rand_u24(0);
case 6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
case 7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
case 8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
case 9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
case 27: return 0x0000800000000000ULL;
case 28: return 0x8000000000000000ULL;
case 29: return 0x0000000000000000ULL;
case 30: return 0xffffffffffffffffULL;
case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
default: return bpf_rand_u64(0);
}
}
#endif /* __BPF_RAND__ */

View file

@ -41,6 +41,7 @@
# endif
#endif
#include "bpf_rlimit.h"
#include "bpf_rand.h"
#include "../../../include/linux/filter.h"
#ifndef ARRAY_SIZE
@ -152,6 +153,30 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
insn[i] = BPF_EXIT_INSN();
}
static void bpf_fill_rand_ld_dw(struct bpf_test *self)
{
struct bpf_insn *insn = self->insns;
uint64_t res = 0;
int i = 0;
insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
while (i < self->retval) {
uint64_t val = bpf_semi_rand_get();
struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
res ^= val;
insn[i++] = tmp[0];
insn[i++] = tmp[1];
insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
}
insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
insn[i] = BPF_EXIT_INSN();
res ^= (res >> 32);
self->retval = (uint32_t)res;
}
static struct bpf_test tests[] = {
{
"add+sub+mul",
@ -11974,6 +11999,42 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
.retval = 10,
},
{
"ld_dw: xor semi-random 64 bit imms, test 1",
.insns = { },
.data = { },
.fill_helper = bpf_fill_rand_ld_dw,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 4090,
},
{
"ld_dw: xor semi-random 64 bit imms, test 2",
.insns = { },
.data = { },
.fill_helper = bpf_fill_rand_ld_dw,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 2047,
},
{
"ld_dw: xor semi-random 64 bit imms, test 3",
.insns = { },
.data = { },
.fill_helper = bpf_fill_rand_ld_dw,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 511,
},
{
"ld_dw: xor semi-random 64 bit imms, test 4",
.insns = { },
.data = { },
.fill_helper = bpf_fill_rand_ld_dw,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 5,
},
};
static int probe_filter_length(const struct bpf_insn *fp)
@ -12346,5 +12407,6 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
bpf_semi_rand_init();
return do_test(unpriv, from, to);
}