diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9b1df43dadbb..e0571a24d582 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -486,13 +486,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) static __always_inline __pure bool _static_cpu_has_safe(u16 bit) { #ifdef CC_HAVE_ASM_GOTO -/* - * We need to spell the jumps to the compiler because, depending on the offset, - * the replacement jump can be bigger than the original jump, and this we cannot - * have. Thus, we force the jump to the widest, 4-byte, signed relative - * offset even though the last would often fit in less bytes. - */ - asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + asm_volatile_goto("1: jmp %l[t_dynamic]\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" @@ -506,7 +500,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) " .byte 3b - 2b\n" /* pad len */ ".previous\n" ".section .altinstr_replacement,\"ax\"\n" - "4: .byte 0xe9\n .long %l[t_no] - 2b\n" + "4: jmp %l[t_no]\n" "5:\n" ".previous\n" ".section .altinstructions,\"a\"\n" diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c99b0f13a90e..715af37bf008 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -58,6 +58,21 @@ do { \ printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ } while (0) +#define DUMP_BYTES(buf, len, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int j; \ + \ + if (!(len)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (j = 0; j < (len) - 1; j++) \ + printk(KERN_CONT "%02hhx ", buf[j]); \ + printk(KERN_CONT "%02hhx\n", buf[j]); \ + } \ +} while (0) + /* * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes * that correspond to that nop. Getting from one nop to the next, we @@ -243,6 +258,71 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void *text_poke_early(void *addr, const void *opcode, size_t len); +/* + * Are we looking at a near JMP with a 1 or 4-byte displacement. + */ +static inline bool is_jmp(const u8 opcode) +{ + return opcode == 0xeb || opcode == 0xe9; +} + +static void __init_or_module +recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) +{ + u8 *next_rip, *tgt_rip; + s32 n_dspl, o_dspl; + int repl_len; + + if (a->replacementlen != 5) + return; + + o_dspl = *(s32 *)(insnbuf + 1); + + /* next_rip of the replacement JMP */ + next_rip = repl_insn + a->replacementlen; + /* target rip of the replacement JMP */ + tgt_rip = next_rip + o_dspl; + n_dspl = tgt_rip - orig_insn; + + DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); + + if (tgt_rip - orig_insn >= 0) { + if (n_dspl - 2 <= 127) + goto two_byte_jmp; + else + goto five_byte_jmp; + /* negative offset */ + } else { + if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) + goto two_byte_jmp; + else + goto five_byte_jmp; + } + +two_byte_jmp: + n_dspl -= 2; + + insnbuf[0] = 0xeb; + insnbuf[1] = (s8)n_dspl; + add_nops(insnbuf + 2, 3); + + repl_len = 2; + goto done; + +five_byte_jmp: + n_dspl -= 5; + + insnbuf[0] = 0xe9; + *(s32 *)&insnbuf[1] = n_dspl; + + repl_len = 5; + +done: + + DPRINTK("final displ: 0x%08x, JMP 0x%lx", + n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); +} + /* * Replace instructions with better alternatives for this CPU type. This runs * before SMP is initialized to avoid SMP problems with self modifying code. @@ -268,6 +348,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start, * order. */ for (a = start; a < end; a++) { + int insnbuf_sz = 0; + instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->instrlen > sizeof(insnbuf)); @@ -281,24 +363,35 @@ void __init_or_module apply_alternatives(struct alt_instr *start, instr, a->instrlen, replacement, a->replacementlen); + DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); + DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); + memcpy(insnbuf, replacement, a->replacementlen); + insnbuf_sz = a->replacementlen; /* 0xe8 is a relative jump; fix the offset. */ if (*insnbuf == 0xe8 && a->replacementlen == 5) { *(s32 *)(insnbuf + 1) += replacement - instr; - DPRINTK("Fix CALL offset: 0x%x", *(s32 *)(insnbuf + 1)); + DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", + *(s32 *)(insnbuf + 1), + (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5); } - if (a->instrlen > a->replacementlen) + if (a->replacementlen && is_jmp(replacement[0])) + recompute_jump(a, instr, replacement, insnbuf); + + if (a->instrlen > a->replacementlen) { add_nops(insnbuf + a->replacementlen, a->instrlen - a->replacementlen); + insnbuf_sz += a->instrlen - a->replacementlen; + } + DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); - text_poke_early(instr, insnbuf, a->instrlen); + text_poke_early(instr, insnbuf, insnbuf_sz); } } #ifdef CONFIG_SMP - static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) { @@ -449,7 +542,7 @@ int alternatives_text_reserved(void *start, void *end) return 0; } -#endif +#endif /* CONFIG_SMP */ #ifdef CONFIG_PARAVIRT void __init_or_module apply_paravirt(struct paravirt_patch_site *start, diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a9aedd6aa7f7..dad718ce805c 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -25,14 +25,13 @@ */ .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 0: - .byte 0xe9 /* 32bit jump */ - .long \orig-1f /* by default jump to orig */ + jmp \orig 1: .section .altinstr_replacement,"ax" -2: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt1-1b /* offset */ /* or alternatively to alt1 */ -3: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt2-1b /* offset */ /* or alternatively to alt2 */ +2: + jmp \alt1 +3: + jmp \alt2 .previous .section .altinstructions,"a"