From 8c1f75587a18ca032da8f6376d1ed882d7095289 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 11 Jul 2017 10:33:44 -0500 Subject: [PATCH] x86/entry/64: Add unwind hint annotations Add unwind hint annotations to entry_64.S. This will enable the ORC unwinder to unwind through any location in the entry code including syscalls, interrupts, and exceptions. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Slaby Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/b9f6d478aadf68ba57c739dcfac34ec0dc021c4c.1499786555.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 1 - arch/x86/entry/calling.h | 5 +++ arch/x86/entry/entry_64.S | 71 +++++++++++++++++++++++++++++++++------ 3 files changed, 66 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 9976fcecd17e..af28a8a24366 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -2,7 +2,6 @@ # Makefile for the x86 low level entry code # -OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 05ed3d393da7..640aafebdc00 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,4 +1,5 @@ #include +#include /* @@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with movq %rdx, 12*8+\offset(%rsp) movq %rsi, 13*8+\offset(%rsp) movq %rdi, 14*8+\offset(%rsp) + UNWIND_HINT_REGS offset=\offset extra=0 .endm .macro SAVE_C_REGS offset=0 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 @@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with movq %r12, 3*8+\offset(%rsp) movq %rbp, 4*8+\offset(%rsp) movq %rbx, 5*8+\offset(%rsp) + UNWIND_HINT_REGS offset=\offset .endm .macro RESTORE_EXTRA_REGS offset=0 @@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with movq 3*8+\offset(%rsp), %r12 movq 4*8+\offset(%rsp), %rbp movq 5*8+\offset(%rsp), %rbx + UNWIND_HINT_REGS offset=\offset extra=0 .endm .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 @@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with .endif movq 13*8(%rsp), %rsi movq 14*8(%rsp), %rdi + UNWIND_HINT_IRET_REGS offset=16*8 .endm .macro RESTORE_C_REGS RESTORE_C_REGS_HELPER 1,1,1,1,1 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b56f7f23f21c..aa58155187c5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,6 +36,7 @@ #include #include #include +#include #include .code64 @@ -43,9 +44,10 @@ #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret64) + UNWIND_HINT_EMPTY swapgs sysretq -ENDPROC(native_usergs_sysret64) +END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ .macro TRACE_IRQS_IRETQ @@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64) */ ENTRY(entry_SYSCALL_64) + UNWIND_HINT_EMPTY /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + UNWIND_HINT_REGS extra=0 /* * If we need to do entry work or if we guess we'll need to do @@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath: movq EFLAGS(%rsp), %r11 RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY USERGS_SYSRET64 1: @@ -316,6 +321,7 @@ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY USERGS_SYSRET64 opportunistic_sysret_failed: @@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64) DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF popq %rax + UNWIND_HINT_REGS extra=0 jmp entry_SYSCALL64_slow_path 1: @@ -351,6 +358,7 @@ END(stub_ptregs_64) .macro ptregs_stub func ENTRY(ptregs_\func) + UNWIND_HINT_FUNC leaq \func(%rip), %rax jmp stub_ptregs_64 END(ptregs_\func) @@ -367,6 +375,7 @@ END(ptregs_\func) * %rsi: next task */ ENTRY(__switch_to_asm) + UNWIND_HINT_FUNC /* * Save callee-saved registers * This must match the order in inactive_task_frame @@ -406,6 +415,7 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) + UNWIND_HINT_EMPTY movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -413,6 +423,7 @@ ENTRY(ret_from_fork) jnz 1f /* kernel threads are uncommon */ 2: + UNWIND_HINT_REGS movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ @@ -440,10 +451,11 @@ END(ret_from_fork) ENTRY(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + UNWIND_HINT_IRET_REGS pushq $(~vector+0x80) /* Note: always in signed byte range */ - vector=vector+1 jmp common_interrupt .align 8 + vector=vector+1 .endr END(irq_entries_start) @@ -465,9 +477,14 @@ END(irq_entries_start) * * The invariant is that, if irq_count != -1, then the IRQ stack is in use. */ -.macro ENTER_IRQ_STACK old_rsp +.macro ENTER_IRQ_STACK regs=1 old_rsp DEBUG_ENTRY_ASSERT_IRQS_OFF movq %rsp, \old_rsp + + .if \regs + UNWIND_HINT_REGS base=\old_rsp + .endif + incl PER_CPU_VAR(irq_count) jnz .Lirq_stack_push_old_rsp_\@ @@ -504,16 +521,24 @@ END(irq_entries_start) .Lirq_stack_push_old_rsp_\@: pushq \old_rsp + + .if \regs + UNWIND_HINT_REGS indirect=1 + .endif .endm /* * Undoes ENTER_IRQ_STACK. */ -.macro LEAVE_IRQ_STACK +.macro LEAVE_IRQ_STACK regs=1 DEBUG_ENTRY_ASSERT_IRQS_OFF /* We need to be off the IRQ stack before decrementing irq_count. */ popq %rsp + .if \regs + UNWIND_HINT_REGS + .endif + /* * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming * the irq stack but we're not on it. @@ -624,6 +649,7 @@ restore_c_regs_and_iret: INTERRUPT_RETURN ENTRY(native_iret) + UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in * 64-bit mode SS:RSP on the exception stack is always valid. @@ -696,6 +722,7 @@ native_irq_return_ldt: orq PER_CPU_VAR(espfix_stack), %rax SWAPGS movq %rax, %rsp + UNWIND_HINT_IRET_REGS offset=8 /* * At this point, we cannot write to the stack any more, but we can @@ -717,6 +744,7 @@ END(common_interrupt) */ .macro apicinterrupt3 num sym do_sym ENTRY(\sym) + UNWIND_HINT_IRET_REGS ASM_CLAC pushq $~(\num) .Lcommon_\sym: @@ -802,6 +830,8 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) + UNWIND_HINT_IRET_REGS offset=8 + /* Sanity check */ .if \shift_ist != -1 && \paranoid == 0 .error "using shift_ist requires paranoid=1" @@ -825,6 +855,7 @@ ENTRY(\sym) .else call error_entry .endif + UNWIND_HINT_REGS /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ .if \paranoid @@ -922,6 +953,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 * edi: new selector */ ENTRY(native_load_gs_index) + FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS @@ -930,8 +962,9 @@ ENTRY(native_load_gs_index) 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS popfq + FRAME_END ret -END(native_load_gs_index) +ENDPROC(native_load_gs_index) EXPORT_SYMBOL(native_load_gs_index) _ASM_EXTABLE(.Lgs_change, bad_gs) @@ -954,12 +987,12 @@ bad_gs: ENTRY(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp - ENTER_IRQ_STACK old_rsp=%r11 + ENTER_IRQ_STACK regs=0 old_rsp=%r11 call __do_softirq - LEAVE_IRQ_STACK + LEAVE_IRQ_STACK regs=0 leaveq ret -END(do_softirq_own_stack) +ENDPROC(do_softirq_own_stack) #ifdef CONFIG_XEN idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 @@ -983,7 +1016,9 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * see the correct pointer to the pt_regs */ + UNWIND_HINT_FUNC movq %rdi, %rsp /* we don't return, adjust the stack frame */ + UNWIND_HINT_REGS ENTER_IRQ_STACK old_rsp=%r10 call xen_evtchn_do_upcall @@ -1009,6 +1044,7 @@ END(xen_do_hypervisor_callback) * with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) + UNWIND_HINT_EMPTY movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f @@ -1028,11 +1064,13 @@ ENTRY(xen_failsafe_callback) pushq $0 /* RIP */ pushq %r11 pushq %rcx + UNWIND_HINT_IRET_REGS offset=8 jmp general_protection 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp), %rcx movq 8(%rsp), %r11 addq $0x30, %rsp + UNWIND_HINT_IRET_REGS pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS @@ -1078,6 +1116,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) + UNWIND_HINT_FUNC cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1105,6 +1144,7 @@ END(paranoid_entry) * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) + UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF_DEBUG testl %ebx, %ebx /* swapgs needed? */ @@ -1126,6 +1166,7 @@ END(paranoid_exit) * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) + UNWIND_HINT_FUNC cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1210,6 +1251,7 @@ END(error_entry) * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode */ ENTRY(error_exit) + UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF testl %ebx, %ebx @@ -1219,6 +1261,7 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) + UNWIND_HINT_IRET_REGS /* * Fix up the exception frame if we're on Xen. * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most @@ -1290,11 +1333,13 @@ ENTRY(nmi) cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 3*8(%rdx) /* pt_regs->flags */ pushq 2*8(%rdx) /* pt_regs->cs */ pushq 1*8(%rdx) /* pt_regs->rip */ + UNWIND_HINT_IRET_REGS pushq $-1 /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ @@ -1311,6 +1356,7 @@ ENTRY(nmi) pushq %r13 /* pt_regs->r13 */ pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS ENCODE_FRAME_POINTER /* @@ -1465,6 +1511,7 @@ first_nmi: .rept 5 pushq 11*8(%rsp) .endr + UNWIND_HINT_IRET_REGS /* Everything up to here is safe from nested NMIs */ @@ -1480,6 +1527,7 @@ first_nmi: pushq $__KERNEL_CS /* CS */ pushq $1f /* RIP */ INTERRUPT_RETURN /* continues at repeat_nmi below */ + UNWIND_HINT_IRET_REGS 1: #endif @@ -1529,6 +1577,7 @@ end_repeat_nmi: * exceptions might do. */ call paranoid_entry + UNWIND_HINT_REGS /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi @@ -1566,17 +1615,19 @@ nmi_restore: END(nmi) ENTRY(ignore_sysret) + UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret END(ignore_sysret) ENTRY(rewind_stack_do_exit) + UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp movq PER_CPU_VAR(cpu_current_top_of_stack), %rax - leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp + leaq -PTREGS_SIZE(%rax), %rsp + UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE call do_exit -1: jmp 1b END(rewind_stack_do_exit)