diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 89198017e8e6..b87285924023 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -134,10 +134,9 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val,
 }
 
 /*
- * This is an unoptimized 64 immediate emission used for BPF to BPF call
- * addresses. It will always do a full 64 bit decomposition as otherwise
- * more complexity in the last extra pass is required since we previously
- * reserved 4 instructions for the address.
+ * Kernel addresses in the vmalloc space use at most 48 bits, and the
+ * remaining bits are guaranteed to be 0x1. So we can compose the address
+ * with a fixed length movn/movk/movk sequence.
  */
 static inline void emit_addr_mov_i64(const int reg, const u64 val,
 				     struct jit_ctx *ctx)
@@ -145,8 +144,8 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
 	u64 tmp = val;
 	int shift = 0;
 
-	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
-	for (;shift < 48;) {
+	emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
+	while (shift < 32) {
 		tmp >>= 16;
 		shift += 16;
 		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
@@ -634,11 +633,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 					    &func_addr, &func_addr_fixed);
 		if (ret < 0)
 			return ret;
-		if (func_addr_fixed)
-			/* We can use optimized emission here. */
-			emit_a64_mov_i64(tmp, func_addr, ctx);
-		else
-			emit_addr_mov_i64(tmp, func_addr, ctx);
+		emit_addr_mov_i64(tmp, func_addr, ctx);
 		emit(A64_BLR(tmp), ctx);
 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
 		break;