x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack

The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.

Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.

As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.

This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:

 - ALLOC_PT_GPREGS_ON_STACK allocates the structure.

 - SAVE_C_REGS saves to it those registers which are clobbered
   by C code.

 - SAVE_EXTRA_REGS saves to it all other registers.

 - Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
   reverse it.

'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.

Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Denys Vlasenko 2015-02-26 14:40:27 -08:00 committed by Ingo Molnar
parent 6e1327bd2b
commit 76f5df43ca
5 changed files with 215 additions and 265 deletions

View file

@ -62,12 +62,12 @@
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
movl \offset+16(%rsp),%r9d
movl \offset+R9(%rsp),%r9d
.endif
movl \offset+40(%rsp),%ecx
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
movl \offset+RCX(%rsp),%ecx
movl \offset+RDX(%rsp),%edx
movl \offset+RSI(%rsp),%esi
movl \offset+RDI(%rsp),%edi
movl %eax,%eax /* zero extension */
.endm
@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
CFI_REL_OFFSET rip,0
pushq_cfi %rax
cld
SAVE_ARGS 0,1,0
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS_EXCEPT_R891011
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
ASM_STAC
@ -182,7 +183,8 @@ sysexit_from_sys_call:
andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
CFI_REGISTER rip,rdx
RESTORE_ARGS 0,24,0,0,0,0
RESTORE_RSI_RDI
REMOVE_PT_GPREGS_FROM_STACK 3*8
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
@ -256,13 +258,13 @@ sysenter_tracesys:
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz sysenter_auditsys
#endif
SAVE_REST
SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,0,0
ALLOC_PT_GPREGS_ON_STACK 8
SAVE_C_REGS_EXCEPT_RCX_R891011
movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
@ -341,7 +344,7 @@ cstar_dispatch:
jnz sysretl_audit
sysretl_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0
RESTORE_RSI_RDI_RDX
movl RIP-ARGOFFSET(%rsp),%ecx
CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
@ -372,13 +375,13 @@ cstar_tracesys:
jz cstar_auditsys
#endif
xchgl %r9d,%ebp
SAVE_REST
SAVE_EXTRA_REGS
CLEAR_RREGS 0, r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
RESTORE_EXTRA_REGS
xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
SAVE_ARGS 0,1,0
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS_EXCEPT_R891011
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_tracesys
@ -446,16 +450,16 @@ ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
ia32_ret_from_sys_call:
CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call
jmp int_ret_from_sys_call
ia32_tracesys:
SAVE_REST
ia32_tracesys:
SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
ALIGN
ia32_ptregs_common:
popq %r11
CFI_ENDPROC
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
@ -507,9 +510,9 @@ ia32_ptregs_common:
/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
SAVE_REST
SAVE_EXTRA_REGS 8
call *%rax
RESTORE_REST
jmp ia32_sysret /* misbalances the return cache */
RESTORE_EXTRA_REGS 8
ret
CFI_ENDPROC
END(ia32_ptregs_common)

View file

@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
#define R15 0
#define R14 8
#define R13 16
#define R12 24
#define RBP 32
#define RBX 40
/* The layout forms the "struct pt_regs" on the stack: */
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
#define R15 0*8
#define R14 1*8
#define R13 2*8
#define R12 3*8
#define RBP 4*8
#define RBX 5*8
/* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 6*8
#define R10 7*8
#define R9 8*8
#define R8 9*8
#define RAX 10*8
#define RCX 11*8
#define RDX 12*8
#define RSI 13*8
#define RDI 14*8
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
#define ORIG_RAX 15*8
/* Return frame for iretq */
#define RIP 16*8
#define CS 17*8
#define EFLAGS 18*8
#define RSP 19*8
#define SS 20*8
/* arguments: interrupts/non tracing syscalls only save up to here: */
#define R11 48
#define R10 56
#define R9 64
#define R8 72
#define RAX 80
#define RCX 88
#define RDX 96
#define RSI 104
#define RDI 112
#define ORIG_RAX 120 /* + error_code */
/* end of arguments */
/* cpu exception frame or undefined in case of fast syscall: */
#define RIP 128
#define CS 136
#define EFLAGS 144
#define RSP 152
#define SS 160
#define ARGOFFSET R11
.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
subq $9*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 9*8+\addskip
movq_cfi rdi, 8*8
movq_cfi rsi, 7*8
movq_cfi rdx, 6*8
.if \save_rcx
movq_cfi rcx, 5*8
.endif
.if \rax_enosys
movq $-ENOSYS, 4*8(%rsp)
.else
movq_cfi rax, 4*8
.endif
.if \save_r891011
movq_cfi r8, 3*8
movq_cfi r9, 2*8
movq_cfi r10, 1*8
movq_cfi r11, 0*8
.endif
#define ARGOFFSET 0
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
subq $15*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 15*8+\addskip
.endm
#define ARG_SKIP (9*8)
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
.if \r8plus
movq_cfi r11, 6*8+\offset
movq_cfi r10, 7*8+\offset
movq_cfi r9, 8*8+\offset
movq_cfi r8, 9*8+\offset
.endif
.if \rax
movq_cfi rax, 10*8+\offset
.endif
.if \rcx
movq_cfi rcx, 11*8+\offset
.endif
movq_cfi rdx, 12*8+\offset
movq_cfi rsi, 13*8+\offset
movq_cfi rdi, 14*8+\offset
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
SAVE_C_REGS_HELPER \offset, 0, 0, 1
.endm
.macro SAVE_C_REGS_EXCEPT_R891011
SAVE_C_REGS_HELPER 0, 1, 1, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
SAVE_C_REGS_HELPER 0, 1, 0, 0
.endm
.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
rstor_r8910=1, rstor_rdx=1
.macro SAVE_EXTRA_REGS offset=0
movq_cfi r15, 0*8+\offset
movq_cfi r14, 1*8+\offset
movq_cfi r13, 2*8+\offset
movq_cfi r12, 3*8+\offset
movq_cfi rbp, 4*8+\offset
movq_cfi rbx, 5*8+\offset
.endm
.macro SAVE_EXTRA_REGS_RBP offset=0
movq_cfi rbp, 4*8+\offset
.endm
.macro RESTORE_EXTRA_REGS offset=0
movq_cfi_restore 0*8+\offset, r15
movq_cfi_restore 1*8+\offset, r14
movq_cfi_restore 2*8+\offset, r13
movq_cfi_restore 3*8+\offset, r12
movq_cfi_restore 4*8+\offset, rbp
movq_cfi_restore 5*8+\offset, rbx
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
movq_cfi_restore 0*8, r11
movq_cfi_restore 6*8, r11
.endif
.if \rstor_r8910
movq_cfi_restore 1*8, r10
movq_cfi_restore 2*8, r9
movq_cfi_restore 3*8, r8
movq_cfi_restore 7*8, r10
movq_cfi_restore 8*8, r9
movq_cfi_restore 9*8, r8
.endif
.if \rstor_rax
movq_cfi_restore 4*8, rax
movq_cfi_restore 10*8, rax
.endif
.if \rstor_rcx
movq_cfi_restore 5*8, rcx
movq_cfi_restore 11*8, rcx
.endif
.if \rstor_rdx
movq_cfi_restore 6*8, rdx
.endif
movq_cfi_restore 7*8, rsi
movq_cfi_restore 8*8, rdi
.if ARG_SKIP+\addskip > 0
addq $ARG_SKIP+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
movq_cfi_restore 12*8, rdx
.endif
movq_cfi_restore 13*8, rsi
movq_cfi_restore 14*8, rdi
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RAX
RESTORE_C_REGS_HELPER 0,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX
RESTORE_C_REGS_HELPER 1,0,1,1,1
.endm
.macro RESTORE_RSI_RDI
RESTORE_C_REGS_HELPER 0,0,0,0,0
.endm
.macro RESTORE_RSI_RDI_RDX
RESTORE_C_REGS_HELPER 0,0,0,0,1
.endm
.macro LOAD_ARGS offset, skiprax=0
movq \offset(%rsp), %r11
movq \offset+8(%rsp), %r10
movq \offset+16(%rsp), %r9
movq \offset+24(%rsp), %r8
movq \offset+40(%rsp), %rcx
movq \offset+48(%rsp), %rdx
movq \offset+56(%rsp), %rsi
movq \offset+64(%rsp), %rdi
.if \skiprax
.else
movq \offset+72(%rsp), %rax
.endif
.endm
#define REST_SKIP (6*8)
.macro SAVE_REST
subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
movq_cfi rbx, 5*8
movq_cfi rbp, 4*8
movq_cfi r12, 3*8
movq_cfi r13, 2*8
movq_cfi r14, 1*8
movq_cfi r15, 0*8
.endm
.macro RESTORE_REST
movq_cfi_restore 0*8, r15
movq_cfi_restore 1*8, r14
movq_cfi_restore 2*8, r13
movq_cfi_restore 3*8, r12
movq_cfi_restore 4*8, rbp
movq_cfi_restore 5*8, rbx
addq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
.endm
.macro SAVE_ALL
SAVE_ARGS
SAVE_REST
.endm
.macro RESTORE_ALL addskip=0
RESTORE_REST
RESTORE_ARGS 1, \addskip
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
addq $15*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
.endm
.macro icebp

View file

@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
SAVE_REST; \
SAVE_EXTRA_REGS; \
LOCKDEP_SYS_EXIT; \
RESTORE_REST; \
RESTORE_EXTRA_REGS; \
cli; \
TRACE_IRQS_OFF;

View file

@ -49,7 +49,6 @@
#define EFLAGS 144
#define RSP 152
#define SS 160
#define ARGOFFSET R11
#endif /* __ASSEMBLY__ */
/* top of stack page */

View file

@ -26,12 +26,6 @@
* Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better
* backtraces. They don't change any code.
* - SAVE_ALL/RESTORE_ALL - Save/restore all registers
* - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
* There are unfortunately lots of special cases where some registers
* not touched. The macro is a big mess that should be cleaned up.
* - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
* Gives a full stack frame.
* - ENTRY/END Define functions in the symbol table.
* - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
* frame that is otherwise undefined after a SYSCALL
@ -190,9 +184,9 @@ ENDPROC(native_usergs_sysret64)
.endm
/*
* frame that enables calling into C.
* frame that enables passing a complete pt_regs to a C function.
*/
.macro PARTIAL_FRAME start=1 offset=0
.macro DEFAULT_FRAME start=1 offset=0
XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
@ -203,13 +197,6 @@ ENDPROC(native_usergs_sysret64)
CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
.endm
/*
* frame that enables passing a complete pt_regs to a C function.
*/
.macro DEFAULT_FRAME start=1 offset=0
PARTIAL_FRAME \start, R11+\offset-R15
CFI_REL_OFFSET rbx, RBX+\offset
CFI_REL_OFFSET rbp, RBP+\offset
CFI_REL_OFFSET r12, R12+\offset
@ -221,21 +208,8 @@ ENDPROC(native_usergs_sysret64)
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
movq %rdi, RDI+8(%rsp)
movq %rsi, RSI+8(%rsp)
movq_cfi rdx, RDX+8
movq_cfi rcx, RCX+8
movq_cfi rax, RAX+8
movq %r8, R8+8(%rsp)
movq %r9, R9+8(%rsp)
movq %r10, R10+8(%rsp)
movq %r11, R11+8(%rsp)
movq_cfi rbx, RBX+8
movq %rbp, RBP+8(%rsp)
movq %r12, R12+8(%rsp)
movq %r13, R13+8(%rsp)
movq %r14, R14+8(%rsp)
movq %r15, R15+8(%rsp)
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
@ -264,7 +238,7 @@ ENTRY(ret_from_fork)
GET_THREAD_INFO(%rcx)
RESTORE_REST
RESTORE_EXTRA_REGS
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
jz 1f
@ -276,12 +250,10 @@ ENTRY(ret_from_fork)
jmp ret_from_sys_call # go to the SYSRET fastpath
1:
subq $REST_SKIP, %rsp # leave space for volatiles
CFI_ADJUST_CFA_OFFSET REST_SKIP
movq %rbp, %rdi
call *%rbx
movl $0, RAX(%rsp)
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(ret_from_fork)
@ -339,9 +311,11 @@ GLOBAL(system_call_after_swapgs)
* and short:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8, 0, rax_enosys=1
ALLOC_PT_GPREGS_ON_STACK 8
SAVE_C_REGS_EXCEPT_RAX_RCX
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
movq_cfi rax,(ORIG_RAX-ARGOFFSET)
movq %rcx,RIP-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz tracesys
@ -372,9 +346,9 @@ ret_from_sys_call:
* sysretq will re-enable interrupts:
*/
TRACE_IRQS_ON
RESTORE_C_REGS_EXCEPT_RCX
movq RIP-ARGOFFSET(%rsp),%rcx
CFI_REGISTER rip,rcx
RESTORE_ARGS 1,-ARG_SKIP,0
/*CFI_REGISTER rflags,r11*/
movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
@ -387,16 +361,17 @@ int_ret_from_sys_call_fixup:
/* Do syscall tracing */
tracesys:
leaq -REST_SKIP(%rsp), %rdi
movq %rsp, %rdi
movq $AUDIT_ARCH_X86_64, %rsi
call syscall_trace_enter_phase1
test %rax, %rax
jnz tracesys_phase2 /* if needed, run the slow path */
LOAD_ARGS 0 /* else restore clobbered regs */
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
movq ORIG_RAX-ARGOFFSET(%rsp), %rax
jmp system_call_fastpath /* and return to the fast path */
tracesys_phase2:
SAVE_REST
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %rdi
movq %rsp, %rdi
movq $AUDIT_ARCH_X86_64, %rsi
@ -408,8 +383,8 @@ tracesys_phase2:
* We don't reload %rax because syscall_trace_entry_phase2() returned
* the value it wants us to use in the table lookup.
*/
LOAD_ARGS ARGOFFSET, 1
RESTORE_REST
RESTORE_C_REGS_EXCEPT_RAX
RESTORE_EXTRA_REGS
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
#else
@ -460,7 +435,7 @@ int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
int_check_syscall_exit_work:
SAVE_REST
SAVE_EXTRA_REGS
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
@ -479,7 +454,7 @@ int_signal:
call do_notify_resume
1: movl $_TIF_WORK_MASK,%edi
int_restore_rest:
RESTORE_REST
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
@ -489,15 +464,12 @@ END(system_call)
.macro FORK_LIKE func
ENTRY(stub_\func)
CFI_STARTPROC
popq %r11 /* save return address */
PARTIAL_FRAME 0
SAVE_REST
pushq %r11 /* put it back on stack */
DEFAULT_FRAME 0, 8 /* offset 8: return address */
SAVE_EXTRA_REGS 8
FIXUP_TOP_OF_STACK %r11, 8
DEFAULT_FRAME 0 8 /* offset 8: return address */
call sys_\func
RESTORE_TOP_OF_STACK %r11, 8
ret $REST_SKIP /* pop extended registers */
ret
CFI_ENDPROC
END(stub_\func)
.endm
@ -505,7 +477,7 @@ END(stub_\func)
.macro FIXED_FRAME label,func
ENTRY(\label)
CFI_STARTPROC
PARTIAL_FRAME 0 8 /* offset 8: return address */
DEFAULT_FRAME 0, 8 /* offset 8: return address */
FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
call \func
RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
@ -522,12 +494,12 @@ END(\label)
ENTRY(stub_execve)
CFI_STARTPROC
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %r11
call sys_execve
movq %rax,RAX(%rsp)
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execve)
@ -535,13 +507,13 @@ END(stub_execve)
ENTRY(stub_execveat)
CFI_STARTPROC
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %r11
call sys_execveat
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execveat)
@ -553,12 +525,12 @@ END(stub_execveat)
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %r11
call sys_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_rt_sigreturn)
@ -567,12 +539,12 @@ END(stub_rt_sigreturn)
ENTRY(stub_x32_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %r11
call sys32_x32_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_x32_rt_sigreturn)
@ -580,13 +552,13 @@ END(stub_x32_rt_sigreturn)
ENTRY(stub_x32_execve)
CFI_STARTPROC
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %r11
call compat_sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_x32_execve)
@ -594,13 +566,13 @@ END(stub_x32_execve)
ENTRY(stub_x32_execveat)
CFI_STARTPROC
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
FIXUP_TOP_OF_STACK %r11
call compat_sys_execveat
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_x32_execveat)
@ -656,42 +628,28 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
/* reserve pt_regs for scratch regs and rbp */
subq $ORIG_RAX-RBP, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
cld
/* start from rbp in pt_regs and jump over */
movq_cfi rdi, (RDI-RBP)
movq_cfi rsi, (RSI-RBP)
movq_cfi rdx, (RDX-RBP)
movq_cfi rcx, (RCX-RBP)
movq_cfi rax, (RAX-RBP)
movq_cfi r8, (R8-RBP)
movq_cfi r9, (R9-RBP)
movq_cfi r10, (R10-RBP)
movq_cfi r11, (R11-RBP)
ALLOC_PT_GPREGS_ON_STACK -RBP
SAVE_C_REGS -RBP
/* this goes to 0(%rsp) for unwinder, not for saving the value: */
SAVE_EXTRA_REGS_RBP -RBP
/* Save rbp so that we can unwind from get_irq_regs() */
movq_cfi rbp, 0
leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */
/* Save previous stack value */
movq %rsp, %rsi
leaq -RBP(%rsp),%rdi /* arg1 for handler */
testl $3, CS-RBP(%rsi)
testl $3, CS-RBP(%rsp)
je 1f
SWAPGS
1:
/*
* irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
1: incl PER_CPU_VAR(irq_count)
movq %rsp, %rsi
incl PER_CPU_VAR(irq_count)
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi
/* Store previous stack value */
pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \
@ -800,7 +758,8 @@ retint_swapgs: /* return to user-space */
*/
irq_return_via_sysret:
CFI_REMEMBER_STATE
RESTORE_ARGS 1,8,1
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
movq (RSP-RIP)(%rsp),%rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
@ -816,7 +775,8 @@ retint_restore_args: /* return to kernel space */
*/
TRACE_IRQS_IRETQ
restore_args:
RESTORE_ARGS 1,8,1
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
irq_return:
INTERRUPT_RETURN
@ -887,12 +847,12 @@ retint_signal:
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
SAVE_EXTRA_REGS
movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
RESTORE_REST
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
@ -1019,8 +979,7 @@ ENTRY(\sym)
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
.endif
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
ALLOC_PT_GPREGS_ON_STACK
.if \paranoid
.if \paranoid == 1
@ -1269,7 +1228,9 @@ ENTRY(xen_failsafe_callback)
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
pushq_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
jmp error_exit
CFI_ENDPROC
END(xen_failsafe_callback)
@ -1321,11 +1282,15 @@ ENTRY(paranoid_exit)
jnz paranoid_restore
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
RESTORE_ALL 8
RESTORE_EXTRA_REGS
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
INTERRUPT_RETURN
paranoid_restore:
TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8
RESTORE_EXTRA_REGS
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
INTERRUPT_RETURN
CFI_ENDPROC
END(paranoid_exit)
@ -1339,21 +1304,8 @@ ENTRY(error_entry)
CFI_ADJUST_CFA_OFFSET 15*8
/* oldrax contains error code */
cld
movq %rdi, RDI+8(%rsp)
movq %rsi, RSI+8(%rsp)
movq %rdx, RDX+8(%rsp)
movq %rcx, RCX+8(%rsp)
movq %rax, RAX+8(%rsp)
movq %r8, R8+8(%rsp)
movq %r9, R9+8(%rsp)
movq %r10, R10+8(%rsp)
movq %r11, R11+8(%rsp)
movq_cfi rbx, RBX+8
movq %rbp, RBP+8(%rsp)
movq %r12, R12+8(%rsp)
movq %r13, R13+8(%rsp)
movq %r14, R14+8(%rsp)
movq %r15, R15+8(%rsp)
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
@ -1402,7 +1354,7 @@ END(error_entry)
ENTRY(error_exit)
DEFAULT_FRAME
movl %ebx,%eax
RESTORE_REST
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
@ -1621,8 +1573,8 @@ end_repeat_nmi:
* so that we repeat another NMI.
*/
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
ALLOC_PT_GPREGS_ON_STACK
/*
* Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
* as we should not be calling schedule in NMI context.
@ -1661,8 +1613,10 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_EXTRA_REGS
RESTORE_C_REGS
/* Pop the extra iret frame at once */
RESTORE_ALL 6*8
REMOVE_PT_GPREGS_FROM_STACK 6*8
/* Clear the NMI executing stack variable */
movq $0, 5*8(%rsp)