Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm changes from Ingo Molnar:
 "There were lots of changes in this development cycle:

   - over 100 separate cleanups, restructuring changes, speedups and
     fixes in the x86 system call, irq, trap and other entry code, part
     of a heroic effort to deobfuscate a decade old spaghetti asm code
     and its C code dependencies (Denys Vlasenko, Andy Lutomirski)

   - alternatives code fixes and enhancements (Borislav Petkov)

   - simplifications and cleanups to the compat code (Brian Gerst)

   - signal handling fixes and new x86 testcases (Andy Lutomirski)

   - various other fixes and cleanups

  By their nature many of these changes are risky - we tried to test
  them well on many different x86 systems (there are no known
  regressions), and they are split up finely to help bisection - but
  there's still a fair bit of residual risk left so caveat emptor"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
  perf/x86/64: Report regs_user->ax too in get_regs_user()
  perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
  perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
  perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
  x86/asm/entry/32: Tidy up JNZ instructions after TESTs
  x86/asm/entry/64: Reduce padding in execve stubs
  x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
  x86/asm/entry/64: Simplify jumps in ret_from_fork
  x86/asm/entry/64: Remove a redundant jump
  x86/asm/entry/64: Optimize [v]fork/clone stubs
  x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
  x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
  x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
  x86/asm/entry/64: Add forgotten CFI annotation
  x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
  x86/asm/entry/64: Move opportunistic sysret code to syscall code path
  x86, selftests: Add sigreturn selftest
  x86/alternatives: Guard NOPs optimization
  x86/asm/entry: Clear EXTRA_REGS for all executable formats
  x86/signal: Remove pax argument from restore_sigcontext
  ...
This commit is contained in:
Linus Torvalds 2015-04-13 13:16:36 -07:00
commit 60f898eeaa
121 changed files with 3084 additions and 2104 deletions

View file

@ -406,6 +406,12 @@ Protocol: 2.00+
- If 0, the protected-mode code is loaded at 0x10000.
- If 1, the protected-mode code is loaded at 0x100000.
Bit 1 (kernel internal): ALSR_FLAG
- Used internally by the compressed kernel to communicate
KASLR status to kernel proper.
If 1, KASLR enabled.
If 0, KASLR disabled.
Bit 5 (write): QUIET_FLAG
- If 0, print early messages.
- If 1, suppress early messages.

View file

@ -295,7 +295,8 @@ static unsigned long find_random_addr(unsigned long minimum,
return slots_fetch_random();
}
unsigned char *choose_kernel_location(unsigned char *input,
unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
@ -315,6 +316,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
}
#endif
boot_params->hdr.loadflags |= KASLR_FLAG;
/* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size,
(unsigned long)output, output_size);

View file

@ -29,6 +29,7 @@
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
__HEAD
ENTRY(startup_32)
@ -102,7 +103,7 @@ preferred_addr:
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 1f
cli

View file

@ -31,6 +31,7 @@
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
__HEAD
.code32
@ -46,7 +47,7 @@ ENTRY(startup_32)
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 1f
cli
@ -164,7 +165,7 @@ ENTRY(startup_32)
/* After gdt is loaded */
xorl %eax, %eax
lldt %ax
movl $0x20, %eax
movl $__BOOT_TSS, %eax
ltr %ax
/*

View file

@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
real_mode = rmode;
/* Clear it for solely in-kernel use */
real_mode->hdr.loadflags &= ~KASLR_FLAG;
sanitize_boot_params(real_mode);
if (real_mode->screen_info.orig_video_mode == 7) {
@ -401,7 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
output = choose_kernel_location(input_data, input_len, output,
output = choose_kernel_location(real_mode, input_data, input_len, output,
output_len > run_size ? output_len
: run_size);

View file

@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
#if CONFIG_RANDOMIZE_BASE
/* aslr.c */
unsigned char *choose_kernel_location(unsigned char *input,
unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size);
@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
bool has_cpuflag(int flag);
#else
static inline
unsigned char *choose_kernel_location(unsigned char *input,
unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)

View file

@ -178,7 +178,7 @@ continue_block:
## 2a) PROCESS FULL BLOCKS:
################################################################
full_block:
movq $128,%rax
movl $128,%eax
lea 128*8*2(block_0), block_1
lea 128*8*3(block_0), block_2
add $128*8*1, block_0

View file

@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
movq R1, 8(%rsi)
popq R1
movq $1,%rax
movl $1,%eax
ret
ENDPROC(twofish_enc_blk)
@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
movq R1, 8(%rsi)
popq R1
movq $1,%rax
movl $1,%eax
ret
ENDPROC(twofish_dec_blk)

View file

@ -3,7 +3,6 @@
#
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o

View file

@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
}
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc,
unsigned int *pax)
struct sigcontext_ia32 __user *sc)
{
unsigned int tmpflags, err = 0;
void __user *buf;
@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
RELOAD_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
/* Don't touch extended registers */
COPY_SEG_CPL3(cs);
@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
err |= restore_xstate_sig(buf, 1);
force_iret();
return err;
}
@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
unsigned int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
if (ia32_restore_sigcontext(regs, &frame->sc))
goto badframe;
return ax;
return regs->ax;
badframe:
signal_fault(regs, frame, "32bit sigreturn");
@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
unsigned int ax;
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
return regs->ax;
badframe:
signal_fault(regs, frame, "32bit rt sigreturn");

View file

@ -30,24 +30,13 @@
.section .entry.text, "ax"
.macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d
.if \noebp
.else
movl %ebp,%r9d
.endif
xchg %ecx,%esi
movl %ebx,%edi
movl %edx,%edx /* zero extension */
.endm
/* clobbers %eax */
.macro CLEAR_RREGS offset=0, _r9=rax
/* clobbers %rax */
.macro CLEAR_RREGS _r9=rax
xorl %eax,%eax
movq %rax,\offset+R11(%rsp)
movq %rax,\offset+R10(%rsp)
movq %\_r9,\offset+R9(%rsp)
movq %rax,\offset+R8(%rsp)
movq %rax,R11(%rsp)
movq %rax,R10(%rsp)
movq %\_r9,R9(%rsp)
movq %rax,R8(%rsp)
.endm
/*
@ -60,14 +49,14 @@
* If it's -1 to make us punt the syscall, then (u32)-1 is still
* an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.macro LOAD_ARGS32 _r9=0
.if \_r9
movl \offset+16(%rsp),%r9d
movl R9(%rsp),%r9d
.endif
movl \offset+40(%rsp),%ecx
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
movl RCX(%rsp),%ecx
movl RDX(%rsp),%edx
movl RSI(%rsp),%esi
movl RDI(%rsp),%edi
movl %eax,%eax /* zero extension */
.endm
@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
/*
* 32bit SYSENTER instruction entry.
*
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
* IF and VM in rflags are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
* and does not save old rip (!!!) and rflags.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx Arg2
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp user stack
* 0(%ebp) Arg6
*
* Interrupts off.
*
* eax system call number
* ebx arg1
* ecx arg2
* edx arg3
* esi arg4
* edi arg5
* ebp user stack
* 0(%ebp) arg6
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code
* path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
*/
ENTRY(ia32_sysenter_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
movq PER_CPU_VAR(kernel_stack), %rsp
addq $(KERNEL_STACK_OFFSET),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs, here we enable it straight after entry:
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK
movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
ENABLE_INTERRUPTS(CLBR_NONE)
movl %ebp,%ebp /* zero extension */
pushq_cfi $__USER32_DS
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rbp
CFI_REL_OFFSET rsp,0
pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
CFI_REGISTER rip,r10
pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
/* Zero-extending 32-bit regs, do not remove */
movl %ebp, %ebp
movl %eax, %eax
pushq_cfi %r10
CFI_REL_OFFSET rip,0
pushq_cfi %rax
movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
CFI_REGISTER rip,r10
/* Construct struct pt_regs on stack */
pushq_cfi $__USER32_DS /* pt_regs->ss */
pushq_cfi %rbp /* pt_regs->sp */
CFI_REL_OFFSET rsp,0
pushfq_cfi /* pt_regs->flags */
pushq_cfi $__USER32_CS /* pt_regs->cs */
pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */
CFI_REL_OFFSET rip,0
pushq_cfi_reg rax /* pt_regs->orig_ax */
pushq_cfi_reg rdi /* pt_regs->di */
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
pushq_cfi_reg rax /* pt_regs->ax */
cld
SAVE_ARGS 0,1,0
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
/*
* no need to do an access_ok check here because rbp has been
* 32bit zero extended
*/
ASM_STAC
1: movl (%rbp),%ebp
_ASM_EXTABLE(1b,ia32_badarg)
@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
* ourselves. To save a few cycles, we can check whether
* NT was set instead of doing an unconditional popfq.
*/
testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
testl $X86_EFLAGS_NT,EFLAGS(%rsp)
jnz sysenter_fix_flags
sysenter_flags_fixed:
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
movl %ebp,%r9d /* arg6 */
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
movq %rax,RAX(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysexit_audit
sysexit_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
CFI_REGISTER rip,rdx
RESTORE_ARGS 0,24,0,0,0,0
/*
* NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
* NMI between STI and SYSEXIT has poorly specified behavior,
* and and NMI followed by an IRQ with usergs is fatal. So
* we just pretend we're using SYSEXIT but we really use
* SYSRETL instead.
*
* This code path is still called 'sysexit' because it pairs
* with 'sysenter' and it uses the SYSENTER calling convention.
*/
andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
movl RIP(%rsp),%ecx /* User %eip */
CFI_REGISTER rip,rcx
RESTORE_RSI_RDI
xorl %edx,%edx /* avoid info leaks */
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
popfq_cfi
movl EFLAGS(%rsp),%r11d /* User eflags */
/*CFI_RESTORE rflags*/
popq_cfi %rcx /* User %esp */
CFI_REGISTER rsp,rcx
TRACE_IRQS_ON
ENABLE_INTERRUPTS_SYSEXIT32
/*
* SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
* since it avoids a dicey window with interrupts enabled.
*/
movl RSP(%rsp),%esp
/*
* USERGS_SYSRET32 does:
* gsbase = user's gs base
* eip = ecx
* rflags = r11
* cs = __USER32_CS
* ss = __USER_DS
*
* The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
*
* pop %ebp
* pop %edx
* pop %ecx
*
* Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
* avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
* address (already known to user code), and R12-R15 are
* callee-saved and therefore don't contain any interesting
* kernel data.
*/
USERGS_SYSRET32
CFI_RESTORE_STATE
@ -205,18 +247,18 @@ sysexit_from_sys_call:
movl %ebx,%esi /* 2nd arg: 1st syscall arg */
movl %eax,%edi /* 1st arg: syscall number */
call __audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
movl RAX(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
movl RCX(%rsp),%esi /* reload 2nd syscall arg */
movl RDX(%rsp),%edx /* reload 3rd syscall arg */
movl RSI(%rsp),%ecx /* reload 4th syscall arg */
movl RDI(%rsp),%r8d /* reload 5th syscall arg */
.endm
.macro auditsys_exit exit
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
@ -227,13 +269,13 @@ sysexit_from_sys_call:
1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit
movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
movq RAX(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz \exit
CLEAR_RREGS -ARGOFFSET
CLEAR_RREGS
jmp int_with_check
.endm
@ -253,16 +295,16 @@ sysenter_fix_flags:
sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz sysenter_auditsys
#endif
SAVE_REST
SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
/*
* 32bit SYSCALL instruction entry.
*
* 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
* are not needed). SYSCALL does not save anything on the stack
* and does not change rsp.
*
* Note: rflags saving+masking-with-MSR happens only in Long mode
* (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx return EIP
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp Arg2 [note: not saved in the stack frame, should not be touched]
* %esp user stack
* 0(%esp) Arg6
*
* Interrupts off.
*
* eax system call number
* ecx return address
* ebx arg1
* ebp arg2 (note: not saved in the stack frame, should not be touched)
* edx arg3
* esi arg4
* edi arg5
* esp user stack
* 0(%esp) arg6
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
* path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_DEF_CFA rsp,0
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,0,0
movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
/* Zero-extending 32-bit regs, do not remove */
movl %eax,%eax
/* Construct struct pt_regs on stack */
pushq_cfi $__USER32_DS /* pt_regs->ss */
pushq_cfi %r8 /* pt_regs->sp */
CFI_REL_OFFSET rsp,0
pushq_cfi %r11 /* pt_regs->flags */
pushq_cfi $__USER32_CS /* pt_regs->cs */
pushq_cfi %rcx /* pt_regs->ip */
CFI_REL_OFFSET rip,0
pushq_cfi_reg rax /* pt_regs->orig_ax */
pushq_cfi_reg rdi /* pt_regs->di */
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rbp /* pt_regs->cx */
movl %ebp,%ecx
movq $__USER32_CS,CS-ARGOFFSET(%rsp)
movq $__USER32_DS,SS-ARGOFFSET(%rsp)
movq %r11,EFLAGS-ARGOFFSET(%rsp)
/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
movq %r8,RSP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
/* hardware stack frame is complete now */
pushq_cfi_reg rax /* pt_regs->ax */
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
/*
* no need to do an access_ok check here because r8 has been
* 32bit zero extended
*/
ASM_STAC
1: movl (%r8),%r9d
_ASM_EXTABLE(1b,ia32_badarg)
ASM_CLAC
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
/* r9 already loaded */ /* arg6 */
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
cstar_dispatch:
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
movq %rax,RAX(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysretl_audit
sysretl_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0
movl RIP-ARGOFFSET(%rsp),%ecx
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
RESTORE_RSI_RDI_RDX
movl RIP(%rsp),%ecx
CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
movl EFLAGS(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/
xorq %r10,%r10
xorq %r9,%r9
xorq %r8,%r8
TRACE_IRQS_ON
movl RSP-ARGOFFSET(%rsp),%esp
movl RSP(%rsp),%esp
CFI_RESTORE rsp
/*
* 64bit->32bit SYSRET restores eip from ecx,
* eflags from r11 (but RF and VM bits are forced to 0),
* cs and ss are loaded from MSRs.
* (Note: 32bit->32bit SYSRET is different: since r11
* does not exist, it merely sets eflags.IF=1).
*/
USERGS_SYSRET32
#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
CFI_RESTORE_STATE
movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
movl %r9d,R9(%rsp) /* register to be clobbered by call */
auditsys_entry_common
movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
movl R9(%rsp),%r9d /* reload 6th syscall arg */
jmp cstar_dispatch
sysretl_audit:
@ -368,17 +444,17 @@ sysretl_audit:
cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz cstar_auditsys
#endif
xchgl %r9d,%ebp
SAVE_REST
CLEAR_RREGS 0, r9
SAVE_EXTRA_REGS
CLEAR_RREGS r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@ -391,78 +467,94 @@ ia32_badarg:
jmp ia32_sysret
CFI_ENDPROC
/*
* Emulated IA32 system calls via int 0x80.
/*
* Emulated IA32 system calls via int 0x80.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx Arg2
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp Arg6 [note: not saved in the stack frame, should not be touched]
* Arguments:
* eax system call number
* ebx arg1
* ecx arg2
* edx arg3
* esi arg4
* edi arg5
* ebp arg6 (note: not saved in the stack frame, should not be touched)
*
* Notes:
* Uses the same stack frame as the x86-64 version.
* All registers except %eax must be saved (but ptrace may violate that)
* Uses the same stack frame as the x86-64 version.
* All registers except eax must be saved (but ptrace may violate that).
* Arguments are zero extended. For system calls that want sign extension and
* take long arguments a wrapper is needed. Most calls can just be called
* directly.
* Assumes it is only called from user space and entered with interrupts off.
*/
* Assumes it is only called from user space and entered with interrupts off.
*/
ENTRY(ia32_syscall)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-RIP
/*CFI_REL_OFFSET ss,SS-RIP*/
CFI_REL_OFFSET rsp,RSP-RIP
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
/*CFI_REL_OFFSET cs,CS-RIP*/
CFI_REL_OFFSET rip,RIP-RIP
CFI_DEF_CFA rsp,5*8
/*CFI_REL_OFFSET ss,4*8 */
CFI_REL_OFFSET rsp,3*8
/*CFI_REL_OFFSET rflags,2*8 */
/*CFI_REL_OFFSET cs,1*8 */
CFI_REL_OFFSET rip,0*8
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
PARAVIRT_ADJUST_EXCEPTION_FRAME
SWAPGS
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%eax
pushq_cfi %rax
/* Zero-extending 32-bit regs, do not remove */
movl %eax,%eax
/* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq_cfi_reg rax /* pt_regs->orig_ax */
pushq_cfi_reg rdi /* pt_regs->di */
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
pushq_cfi_reg rax /* pt_regs->ax */
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
SAVE_ARGS 0,1,0
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
movl %ebp,%r9d /* arg6 */
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
movq %rax,RAX(%rsp)
ia32_ret_from_sys_call:
CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call
CLEAR_RREGS
jmp int_ret_from_sys_call
ia32_tracesys:
SAVE_REST
ia32_tracesys:
SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
movq $0,ORIG_RAX(%rsp)
movq $-ENOSYS,%rax
jmp ia32_sysret
@ -479,8 +571,6 @@ GLOBAL(\label)
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
PTREGSCALL stub32_sigreturn, sys32_sigreturn
PTREGSCALL stub32_execve, compat_sys_execve
PTREGSCALL stub32_execveat, compat_sys_execveat
PTREGSCALL stub32_fork, sys_fork
PTREGSCALL stub32_vfork, sys_vfork
@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
ALIGN
ia32_ptregs_common:
popq %r11
CFI_ENDPROC
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-ARGOFFSET
CFI_REL_OFFSET rax,RAX-ARGOFFSET
CFI_REL_OFFSET rcx,RCX-ARGOFFSET
CFI_REL_OFFSET rdx,RDX-ARGOFFSET
CFI_REL_OFFSET rsi,RSI-ARGOFFSET
CFI_REL_OFFSET rdi,RDI-ARGOFFSET
CFI_REL_OFFSET rip,RIP-ARGOFFSET
/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
SAVE_REST
CFI_DEF_CFA rsp,SIZEOF_PTREGS
CFI_REL_OFFSET rax,RAX
CFI_REL_OFFSET rcx,RCX
CFI_REL_OFFSET rdx,RDX
CFI_REL_OFFSET rsi,RSI
CFI_REL_OFFSET rdi,RDI
CFI_REL_OFFSET rip,RIP
/* CFI_REL_OFFSET cs,CS*/
/* CFI_REL_OFFSET rflags,EFLAGS*/
CFI_REL_OFFSET rsp,RSP
/* CFI_REL_OFFSET ss,SS*/
SAVE_EXTRA_REGS 8
call *%rax
RESTORE_REST
jmp ia32_sysret /* misbalances the return cache */
RESTORE_EXTRA_REGS 8
ret
CFI_ENDPROC
END(ia32_ptregs_common)

View file

@ -1,7 +0,0 @@
#include <linux/kernel.h>
#include <linux/errno.h>
long compat_ni_syscall(void)
{
return -ENOSYS;
}

View file

@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
advice);
}
long sys32_vm86_warning(void)
{
struct task_struct *me = current;
static char lastcomm[sizeof(me->comm)];
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
compat_printk(KERN_INFO
"%s: vm86 mode not supported on 64 bit kernel\n",
me->comm);
strncpy(lastcomm, me->comm, sizeof(lastcomm));
}
return -ENOSYS;
}
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
size_t count)
{

View file

@ -1,25 +0,0 @@
/* System call table for ia32 emulation. */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/asm-offsets.h>
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
typedef void (*sys_call_ptr_t)(void);
extern void compat_ni_syscall(void);
const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
#include <asm/syscalls_32.h>
};

View file

@ -18,12 +18,63 @@
.endm
#endif
.macro altinstruction_entry orig alt feature orig_len alt_len
.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - .
.long \alt - .
.word \feature
.byte \orig_len
.byte \alt_len
.byte \pad_len
.endm
.macro ALTERNATIVE oldinstr, newinstr, feature
140:
\oldinstr
141:
.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
142:
.pushsection .altinstructions,"a"
altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
.popsection
.pushsection .altinstr_replacement,"ax"
143:
\newinstr
144:
.popsection
.endm
#define old_len 141b-140b
#define new_len1 144f-143f
#define new_len2 145f-144f
/*
* max without conditionals. Idea adapted from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
(alt_max_short(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
.popsection
.pushsection .altinstr_replacement,"ax"
143:
\newinstr1
144:
\newinstr2
145:
.popsection
.endm
#endif /* __ASSEMBLY__ */

View file

@ -48,8 +48,9 @@ struct alt_instr {
s32 repl_offset; /* offset to replacement instruction */
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
};
u8 replacementlen; /* length of new instruction */
u8 padlen; /* length of build-time padding */
} __packed;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
#define b_replacement(num) "664"#num
#define e_replacement(num) "665"#num
#define b_replacement(number) "663"#number
#define e_replacement(number) "664"#number
#define alt_end_marker "663"
#define alt_slen "662b-661b"
#define alt_pad_len alt_end_marker"b-662b"
#define alt_total_slen alt_end_marker"b-661b"
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
#define alt_slen "662b-661b"
#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
#define __OLDINSTR(oldinstr, num) \
"661:\n\t" oldinstr "\n662:\n" \
".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
#define ALTINSTR_ENTRY(feature, number) \
#define OLDINSTR(oldinstr, num) \
__OLDINSTR(oldinstr, num) \
alt_end_marker ":\n"
/*
* max without conditionals. Idea adapted from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*
* The additional "-" is needed because gas works with s32s.
*/
#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
/*
* Pad the second replacement alternative with additional NOPs if it is
* additionally longer than the first replacement alternative.
*/
#define OLDINSTR_2(oldinstr, num1, num2) \
"661:\n\t" oldinstr "\n662:\n" \
".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \
" .long " b_replacement(number)"f - .\n" /* new instruction */ \
" .long " b_replacement(num)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
" .byte " alt_slen "\n" /* source len */ \
" .byte " alt_rlen(number) "\n" /* replacement len */
" .byte " alt_total_slen "\n" /* source len */ \
" .byte " alt_rlen(num) "\n" /* replacement len */ \
" .byte " alt_pad_len "\n" /* pad len */
#define DISCARD_ENTRY(number) /* rlen <= slen */ \
" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \
OLDINSTR(oldinstr) \
OLDINSTR(oldinstr, 1) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature, 1) \
".popsection\n" \
".pushsection .discard,\"aw\",@progbits\n" \
DISCARD_ENTRY(1) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR(oldinstr) \
OLDINSTR_2(oldinstr, 1, 2) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature1, 1) \
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
".pushsection .discard,\"aw\",@progbits\n" \
DISCARD_ENTRY(1) \
DISCARD_ENTRY(2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative(oldinstr, newinstr, feature) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
/*
* Alternative inline assembly with input.
*

View file

@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
ASM_OUTPUT2("0" (v), "m" (*addr)));
}

View file

@ -95,13 +95,11 @@ do { \
* Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined
* code region.
*
* (Could use an alternative three way for this if there was one.)
*/
static __always_inline void rdtsc_barrier(void)
{
alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
"lfence", X86_FEATURE_LFENCE_RDTSC);
}
#endif /* _ASM_X86_BARRIER_H */

View file

@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
#define R15 0
#define R14 8
#define R13 16
#define R12 24
#define RBP 32
#define RBX 40
/* The layout forms the "struct pt_regs" on the stack: */
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
#define R15 0*8
#define R14 1*8
#define R13 2*8
#define R12 3*8
#define RBP 4*8
#define RBX 5*8
/* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 6*8
#define R10 7*8
#define R9 8*8
#define R8 9*8
#define RAX 10*8
#define RCX 11*8
#define RDX 12*8
#define RSI 13*8
#define RDI 14*8
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
#define ORIG_RAX 15*8
/* Return frame for iretq */
#define RIP 16*8
#define CS 17*8
#define EFLAGS 18*8
#define RSP 19*8
#define SS 20*8
/* arguments: interrupts/non tracing syscalls only save up to here: */
#define R11 48
#define R10 56
#define R9 64
#define R8 72
#define RAX 80
#define RCX 88
#define RDX 96
#define RSI 104
#define RDI 112
#define ORIG_RAX 120 /* + error_code */
/* end of arguments */
/* cpu exception frame or undefined in case of fast syscall: */
#define RIP 128
#define CS 136
#define EFLAGS 144
#define RSP 152
#define SS 160
#define ARGOFFSET R11
.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
subq $9*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 9*8+\addskip
movq_cfi rdi, 8*8
movq_cfi rsi, 7*8
movq_cfi rdx, 6*8
.if \save_rcx
movq_cfi rcx, 5*8
.endif
.if \rax_enosys
movq $-ENOSYS, 4*8(%rsp)
.else
movq_cfi rax, 4*8
.endif
.if \save_r891011
movq_cfi r8, 3*8
movq_cfi r9, 2*8
movq_cfi r10, 1*8
movq_cfi r11, 0*8
.endif
#define SIZEOF_PTREGS 21*8
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
subq $15*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 15*8+\addskip
.endm
#define ARG_SKIP (9*8)
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
.if \r11
movq_cfi r11, 6*8+\offset
.endif
.if \r8910
movq_cfi r10, 7*8+\offset
movq_cfi r9, 8*8+\offset
movq_cfi r8, 9*8+\offset
.endif
.if \rax
movq_cfi rax, 10*8+\offset
.endif
.if \rcx
movq_cfi rcx, 11*8+\offset
.endif
movq_cfi rdx, 12*8+\offset
movq_cfi rsi, 13*8+\offset
movq_cfi rdi, 14*8+\offset
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_R891011
SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
.endm
.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
rstor_r8910=1, rstor_rdx=1
.macro SAVE_EXTRA_REGS offset=0
movq_cfi r15, 0*8+\offset
movq_cfi r14, 1*8+\offset
movq_cfi r13, 2*8+\offset
movq_cfi r12, 3*8+\offset
movq_cfi rbp, 4*8+\offset
movq_cfi rbx, 5*8+\offset
.endm
.macro SAVE_EXTRA_REGS_RBP offset=0
movq_cfi rbp, 4*8+\offset
.endm
.macro RESTORE_EXTRA_REGS offset=0
movq_cfi_restore 0*8+\offset, r15
movq_cfi_restore 1*8+\offset, r14
movq_cfi_restore 2*8+\offset, r13
movq_cfi_restore 3*8+\offset, r12
movq_cfi_restore 4*8+\offset, rbp
movq_cfi_restore 5*8+\offset, rbx
.endm
.macro ZERO_EXTRA_REGS
xorl %r15d, %r15d
xorl %r14d, %r14d
xorl %r13d, %r13d
xorl %r12d, %r12d
xorl %ebp, %ebp
xorl %ebx, %ebx
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
movq_cfi_restore 0*8, r11
movq_cfi_restore 6*8, r11
.endif
.if \rstor_r8910
movq_cfi_restore 1*8, r10
movq_cfi_restore 2*8, r9
movq_cfi_restore 3*8, r8
movq_cfi_restore 7*8, r10
movq_cfi_restore 8*8, r9
movq_cfi_restore 9*8, r8
.endif
.if \rstor_rax
movq_cfi_restore 4*8, rax
movq_cfi_restore 10*8, rax
.endif
.if \rstor_rcx
movq_cfi_restore 5*8, rcx
movq_cfi_restore 11*8, rcx
.endif
.if \rstor_rdx
movq_cfi_restore 6*8, rdx
.endif
movq_cfi_restore 7*8, rsi
movq_cfi_restore 8*8, rdi
.if ARG_SKIP+\addskip > 0
addq $ARG_SKIP+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
movq_cfi_restore 12*8, rdx
.endif
movq_cfi_restore 13*8, rsi
movq_cfi_restore 14*8, rdi
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RAX
RESTORE_C_REGS_HELPER 0,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX
RESTORE_C_REGS_HELPER 1,0,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_R11
RESTORE_C_REGS_HELPER 1,1,0,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
RESTORE_C_REGS_HELPER 1,0,0,1,1
.endm
.macro RESTORE_RSI_RDI
RESTORE_C_REGS_HELPER 0,0,0,0,0
.endm
.macro RESTORE_RSI_RDI_RDX
RESTORE_C_REGS_HELPER 0,0,0,0,1
.endm
.macro LOAD_ARGS offset, skiprax=0
movq \offset(%rsp), %r11
movq \offset+8(%rsp), %r10
movq \offset+16(%rsp), %r9
movq \offset+24(%rsp), %r8
movq \offset+40(%rsp), %rcx
movq \offset+48(%rsp), %rdx
movq \offset+56(%rsp), %rsi
movq \offset+64(%rsp), %rdi
.if \skiprax
.else
movq \offset+72(%rsp), %rax
.endif
.endm
#define REST_SKIP (6*8)
.macro SAVE_REST
subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
movq_cfi rbx, 5*8
movq_cfi rbp, 4*8
movq_cfi r12, 3*8
movq_cfi r13, 2*8
movq_cfi r14, 1*8
movq_cfi r15, 0*8
.endm
.macro RESTORE_REST
movq_cfi_restore 0*8, r15
movq_cfi_restore 1*8, r14
movq_cfi_restore 2*8, r13
movq_cfi_restore 3*8, r12
movq_cfi_restore 4*8, rbp
movq_cfi_restore 5*8, rbx
addq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
.endm
.macro SAVE_ALL
SAVE_ARGS
SAVE_REST
.endm
.macro RESTORE_ALL addskip=0
RESTORE_REST
RESTORE_ARGS 1, \addskip
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
addq $15*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
.endm
.macro icebp
@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
*/
.macro SAVE_ALL
pushl_cfi %eax
CFI_REL_OFFSET eax, 0
pushl_cfi %ebp
CFI_REL_OFFSET ebp, 0
pushl_cfi %edi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg eax
pushl_cfi_reg ebp
pushl_cfi_reg edi
pushl_cfi_reg esi
pushl_cfi_reg edx
pushl_cfi_reg ecx
pushl_cfi_reg ebx
.endm
.macro RESTORE_ALL
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %ecx
CFI_RESTORE ecx
popl_cfi %edx
CFI_RESTORE edx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi %ebp
CFI_RESTORE ebp
popl_cfi %eax
CFI_RESTORE eax
popl_cfi_reg ebx
popl_cfi_reg ecx
popl_cfi_reg edx
popl_cfi_reg esi
popl_cfi_reg edi
popl_cfi_reg ebp
popl_cfi_reg eax
.endm
#endif /* CONFIG_X86_64 */

View file

@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
sp = task_pt_regs(current)->sp;
} else {
/* -128 for the x32 ABI redzone */
sp = this_cpu_read(old_rsp) - 128;
sp = task_pt_regs(current)->sp - 128;
}
return (void __user *)round_down(sp - len, 16);

View file

@ -231,7 +231,9 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* 1: do replace */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
{
#ifdef CC_HAVE_ASM_GOTO
/*
* We need to spell the jumps to the compiler because, depending on the offset,
* the replacement jump can be bigger than the original jump, and this we cannot
* have. Thus, we force the jump to the widest, 4-byte, signed relative
* offset even though the last would often fit in less bytes.
*/
asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
asm_volatile_goto("1: jmp %l[t_dynamic]\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
"3:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 3f - .\n" /* repl offset */
" .long 4f - .\n" /* repl offset */
" .word %P1\n" /* always replace */
" .byte 2b - 1b\n" /* src len */
" .byte 4f - 3f\n" /* repl len */
" .byte 3b - 1b\n" /* src len */
" .byte 5f - 4f\n" /* repl len */
" .byte 3b - 2b\n" /* pad len */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: .byte 0xe9\n .long %l[t_no] - 2b\n"
"4:\n"
"4: jmp %l[t_no]\n"
"5:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* src len */
" .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
: : t_dynamic, t_no);
@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P2\n" /* always replace */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P1\n" /* feature bit */
" .byte 4b - 3b\n" /* src len */
" .byte 6f - 5f\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */

View file

@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write-protected.
*/
#define set_intr_gate(n, addr) \
#define set_intr_gate_notrace(n, addr) \
do { \
BUG_ON((unsigned)n > 0xFF); \
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
__KERNEL_CS); \
} while (0)
#define set_intr_gate(n, addr) \
do { \
set_intr_gate_notrace(n, addr); \
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
0, 0, __KERNEL_CS); \
} while (0)

View file

@ -86,11 +86,23 @@
CFI_ADJUST_CFA_OFFSET 8
.endm
.macro pushq_cfi_reg reg
pushq %\reg
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET \reg, 0
.endm
.macro popq_cfi reg
popq \reg
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro popq_cfi_reg reg
popq %\reg
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE \reg
.endm
.macro pushfq_cfi
pushfq
CFI_ADJUST_CFA_OFFSET 8
@ -116,11 +128,23 @@
CFI_ADJUST_CFA_OFFSET 4
.endm
.macro pushl_cfi_reg reg
pushl %\reg
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET \reg, 0
.endm
.macro popl_cfi reg
popl \reg
CFI_ADJUST_CFA_OFFSET -4
.endm
.macro popl_cfi_reg reg
popl %\reg
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE \reg
.endm
.macro pushfl_cfi
pushfl
CFI_ADJUST_CFA_OFFSET 4

View file

@ -171,10 +171,11 @@ do { \
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
regs->ax = regs->bx = regs->cx = regs->dx = 0;
regs->si = regs->di = regs->bp = 0;
/* Commented-out registers are cleared in stub_execve */
/*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
regs->si = regs->di /*= regs->bp*/ = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
/*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
t->fs = t->gs = 0;
t->fsindex = t->gsindex = 0;
t->ds = t->es = ds;

View file

@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif
extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
- FIRST_EXTERNAL_VECTOR])(void);
extern char irq_entries_start[];
#ifdef CONFIG_TRACING
#define trace_interrupt interrupt
#define trace_irq_entries_start irq_entries_start
#endif
#define VECTOR_UNDEFINED (-1)

View file

@ -69,7 +69,7 @@ struct insn {
const insn_byte_t *next_byte;
};
#define MAX_INSN_SIZE 16
#define MAX_INSN_SIZE 15
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)

View file

@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
#define USERGS_SYSRET32 \
swapgs; \
sysretl
#define ENABLE_INTERRUPTS_SYSEXIT32 \
swapgs; \
sti; \
sysexit
#else
#define INTERRUPT_RETURN iret
@ -163,33 +159,9 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(flags);
}
#endif /* !__ASSEMBLY__ */
#else
#ifdef CONFIG_X86_64
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
SAVE_REST; \
LOCKDEP_SYS_EXIT; \
RESTORE_REST; \
cli; \
TRACE_IRQS_OFF;
#else
#define ARCH_LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call lockdep_sys_exit; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_LOCKDEP_SYS_EXIT_IRQ
#endif
#ifdef __ASSEMBLY__
#ifdef CONFIG_TRACE_IRQFLAGS
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
@ -198,12 +170,29 @@ static inline int arch_irqs_disabled(void)
# define TRACE_IRQS_OFF
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
# else
# ifdef CONFIG_X86_64
# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
# define LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
call lockdep_sys_exit_thunk; \
cli; \
TRACE_IRQS_OFF;
# else
# define LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call lockdep_sys_exit; \
popl %edx; \
popl %ecx; \
popl %eax;
# define LOCKDEP_SYS_EXIT_IRQ
# endif
#else
# define LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ
# endif
#endif /* __ASSEMBLY__ */
#endif
#endif /* __ASSEMBLY__ */
#endif

View file

@ -976,11 +976,6 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
#define ENABLE_INTERRUPTS_SYSEXIT32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */

View file

@ -210,8 +210,23 @@ struct x86_hw_tss {
unsigned long sp0;
unsigned short ss0, __ss0h;
unsigned long sp1;
/* ss1 caches MSR_IA32_SYSENTER_CS: */
unsigned short ss1, __ss1h;
/*
* We don't use ring 1, so ss1 is a convenient scratch space in
* the same cacheline as sp0. We use ss1 to cache the value in
* MSR_IA32_SYSENTER_CS. When we context switch
* MSR_IA32_SYSENTER_CS, we first check if the new value being
* written matches ss1, and, if it's not, then we wrmsr the new
* value and update ss1.
*
* The only reason we context switch MSR_IA32_SYSENTER_CS is
* that we set it to zero in vm86 tasks to avoid corrupting the
* stack if we were to go through the sysenter path from vm86
* mode.
*/
unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
unsigned short __ss1h;
unsigned long sp2;
unsigned short ss2, __ss2h;
unsigned long __cr3;
@ -276,13 +291,17 @@ struct tss_struct {
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
* .. and then another 0x100 bytes for the emergency kernel stack:
* Space for the temporary SYSENTER stack:
*/
unsigned long stack[64];
unsigned long SYSENTER_stack[64];
} ____cacheline_aligned;
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#endif
/*
* Save the original ist values for checking stack pointers during debugging
@ -474,7 +493,6 @@ struct thread_struct {
#ifdef CONFIG_X86_32
unsigned long sysenter_cs;
#else
unsigned long usersp; /* Copy from PDA */
unsigned short es;
unsigned short ds;
unsigned short fsindex;
@ -564,6 +582,16 @@ static inline void native_swapgs(void)
#endif
}
static inline unsigned long current_top_of_stack(void)
{
#ifdef CONFIG_X86_64
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
#else
/* sp0 on x86_32 is special in and around vm86 mode. */
return this_cpu_read_stable(cpu_current_top_of_stack);
#endif
}
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@ -761,10 +789,10 @@ extern char ignore_fpu_irq;
#define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32
# define BASE_PREFETCH ASM_NOP4
# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
# define BASE_PREFETCH "prefetcht0 (%1)"
# define BASE_PREFETCH "prefetcht0 %P1"
#endif
/*
@ -775,10 +803,9 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
alternative_input(BASE_PREFETCH,
"prefetchnta (%1)",
alternative_input(BASE_PREFETCH, "prefetchnta %P1",
X86_FEATURE_XMM,
"r" (x));
"m" (*(const char *)x));
}
/*
@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
*/
static inline void prefetchw(const void *x)
{
alternative_input(BASE_PREFETCH,
"prefetchw (%1)",
X86_FEATURE_3DNOW,
"r" (x));
alternative_input(BASE_PREFETCH, "prefetchw %P1",
X86_FEATURE_3DNOWPREFETCH,
"m" (*(const char *)x));
}
static inline void spin_lock_prefetch(const void *x)
@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
prefetchw(x);
}
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
#define STACK_TOP_MAX STACK_TOP
#define INIT_THREAD { \
.sp0 = sizeof(init_stack) + (long)&init_stack, \
.sp0 = TOP_OF_INIT_STACK, \
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
}
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
#define INIT_TSS { \
.x86_tss = { \
.sp0 = sizeof(init_stack) + (long)&init_stack, \
.ss0 = __KERNEL_DS, \
.ss1 = __KERNEL_CS, \
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
}, \
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
}
extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
#define KSTK_TOP(info) \
({ \
unsigned long *__ptr = (unsigned long *)(info); \
(unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
})
/*
* The below -8 is to reserve 8 bytes on top of the ring0 stack.
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
* This is necessary to guarantee that the entire "struct pt_regs"
* is accessible even if the CPU haven't stored the SS/ESP registers
* on the stack (interrupt gate does not save these registers
@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
* "struct pt_regs" is possible, but they may contain the
* completely wrong values.
*/
#define task_pt_regs(task) \
({ \
struct pt_regs *__regs__; \
__regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
__regs__ - 1; \
#define task_pt_regs(task) \
({ \
unsigned long __ptr = (unsigned long)task_stack_page(task); \
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
((struct pt_regs *)__ptr) - 1; \
})
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
}
#define INIT_TSS { \
.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
.sp0 = TOP_OF_INIT_STACK \
}
/*
@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
/*
* User space RSP while inside the SYSCALL fast path
*/
DECLARE_PER_CPU(unsigned long, old_rsp);
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,

View file

@ -31,13 +31,17 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
/* arguments: non interrupts/non tracing syscalls only save up to here*/
/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@ -47,9 +51,12 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_ax;
/* end of arguments */
/* cpu exception frame or undefined */
/* Return frame for iretq */
unsigned long ip;
unsigned long cs;
unsigned long flags;
@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
}
/*
* user_mode_vm(regs) determines whether a register set came from user mode.
* This is true if V8086 mode was enabled OR if the register set was from
* protected mode with RPL-3 CS value. This tricky test checks that with
* one comparison. Many places in the kernel can bypass this full check
* if they have already ruled out V8086 mode, so user_mode(regs) can be used.
* user_mode(regs) determines whether a register set came from user
* mode. On x86_32, this is true if V8086 mode was enabled OR if the
* register set was from protected mode with RPL-3 CS value. This
* tricky test checks that with one comparison.
*
* On x86_64, vm86 mode is mercifully nonexistent, and we don't need
* the extra check.
*/
static inline int user_mode(struct pt_regs *regs)
{
@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
#endif
}
static inline int user_mode_vm(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
USER_RPL;
#else
return user_mode(regs);
#endif
}
static inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
#define current_user_stack_pointer() this_cpu_read(old_rsp)
/* ia32 vs. x32 difference */
#define compat_user_stack_pointer() \
(test_thread_flag(TIF_IA32) \
? current_pt_regs()->sp \
: this_cpu_read(old_rsp))
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
#ifdef CONFIG_X86_32
@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
*/
#define arch_ptrace_stop_needed(code, info) \
({ \
set_thread_flag(TIF_NOTIFY_RESUME); \
force_iret(); \
false; \
})

View file

@ -3,8 +3,10 @@
#include <linux/const.h>
/* Constructor for a conventional segment GDT (or LDT) entry */
/* This is a macro so it can be used in initializers */
/*
* Constructor for a conventional segment GDT (or LDT) entry.
* This is a macro so it can be used in initializers.
*/
#define GDT_ENTRY(flags, base, limit) \
((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
(((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
@ -12,198 +14,228 @@
(((base) & _AC(0x00ffffff,ULL)) << 16) | \
(((limit) & _AC(0x0000ffff,ULL))))
/* Simple and small GDT entries for booting only */
/* Simple and small GDT entries for booting only: */
#define GDT_ENTRY_BOOT_CS 2
#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
#define GDT_ENTRY_BOOT_DS 3
#define GDT_ENTRY_BOOT_TSS 4
#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
/*
* Bottom two bits of selector give the ring
* privilege level
*/
#define SEGMENT_RPL_MASK 0x3
#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
/* User mode is privilege level 3: */
#define USER_RPL 0x3
#define SEGMENT_RPL_MASK 0x3 /*
* Bottom two bits of selector give the ring
* privilege level
*/
#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
#define USER_RPL 0x3 /* User mode is privilege level 3 */
#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
#define SEGMENT_TI_MASK 0x4
/* LDT segment has TI set ... */
#define SEGMENT_LDT 0x4
/* ... GDT has it cleared */
#define SEGMENT_GDT 0x0
#define GDT_ENTRY_INVALID_SEG 0
#ifdef CONFIG_X86_32
/*
* The layout of the per-CPU GDT under Linux:
*
* 0 - null
* 0 - null <=== cacheline #1
* 1 - reserved
* 2 - reserved
* 3 - reserved
*
* 4 - unused <==== new cacheline
* 4 - unused <=== cacheline #2
* 5 - unused
*
* ------- start of TLS (Thread-Local Storage) segments:
*
* 6 - TLS segment #1 [ glibc's TLS segment ]
* 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
* 8 - TLS segment #3
* 8 - TLS segment #3 <=== cacheline #3
* 9 - reserved
* 10 - reserved
* 11 - reserved
*
* ------- start of kernel segments:
*
* 12 - kernel code segment <==== new cacheline
* 12 - kernel code segment <=== cacheline #4
* 13 - kernel data segment
* 14 - default user CS
* 15 - default user DS
* 16 - TSS
* 16 - TSS <=== cacheline #5
* 17 - LDT
* 18 - PNPBIOS support (16->32 gate)
* 19 - PNPBIOS support
* 20 - PNPBIOS support
* 20 - PNPBIOS support <=== cacheline #6
* 21 - PNPBIOS support
* 22 - PNPBIOS support
* 23 - APM BIOS support
* 24 - APM BIOS support
* 24 - APM BIOS support <=== cacheline #7
* 25 - APM BIOS support
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
* 28 - stack_canary-20 [ for stack protector ]
* 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
*/
#define GDT_ENTRY_TLS_MIN 6
#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
#define GDT_ENTRY_TLS_MIN 6
#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
#define GDT_ENTRY_KERNEL_CS 12
#define GDT_ENTRY_KERNEL_DS 13
#define GDT_ENTRY_DEFAULT_USER_CS 14
#define GDT_ENTRY_DEFAULT_USER_DS 15
#define GDT_ENTRY_TSS 16
#define GDT_ENTRY_LDT 17
#define GDT_ENTRY_PNPBIOS_CS32 18
#define GDT_ENTRY_PNPBIOS_CS16 19
#define GDT_ENTRY_PNPBIOS_DS 20
#define GDT_ENTRY_PNPBIOS_TS1 21
#define GDT_ENTRY_PNPBIOS_TS2 22
#define GDT_ENTRY_APMBIOS_BASE 23
#define GDT_ENTRY_KERNEL_BASE (12)
#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0)
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1)
#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4)
#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5)
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6)
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11)
#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14)
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
#ifdef CONFIG_SMP
#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
#else
#define __KERNEL_PERCPU 0
#endif
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
#ifdef CONFIG_CC_STACKPROTECTOR
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
#define __KERNEL_STACK_CANARY 0
#endif
#define GDT_ENTRY_ESPFIX_SS 26
#define GDT_ENTRY_PERCPU 27
#define GDT_ENTRY_STACK_CANARY 28
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*
* The GDT has 32 entries
* Number of entries in the GDT table:
*/
#define GDT_ENTRIES 32
/* The PnP BIOS entries in the GDT */
#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
/* The PnP BIOS selectors */
#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
#define GDT_ENTRIES 32
/*
* Matching rules for certain types of segments.
* Segment selector values corresponding to the above entries:
*/
/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
/* segment for calling fn: */
#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
/* code segment for BIOS: */
#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
/* data segment for BIOS: */
#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
/* transfer data segment: */
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
/* another data segment: */
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
#ifdef CONFIG_SMP
# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
#else
# define __KERNEL_PERCPU 0
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
# define __KERNEL_STACK_CANARY 0
#endif
#else /* 64-bit: */
#include <asm/cache.h>
#define GDT_ENTRY_KERNEL32_CS 1
#define GDT_ENTRY_KERNEL_CS 2
#define GDT_ENTRY_KERNEL_DS 3
#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
#define GDT_ENTRY_KERNEL32_CS 1
#define GDT_ENTRY_KERNEL_CS 2
#define GDT_ENTRY_KERNEL_DS 3
/*
* we cannot use the same code segment descriptor for user and kernel
* -- not even in the long flat mode, because of different DPL /kkeil
* The segment offset needs to contain a RPL. Grr. -AK
* GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
* We cannot use the same code segment descriptor for user and kernel mode,
* not even in long flat mode, because of different DPL.
*
* GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
* selectors:
*
* if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
* if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
*
* ss = STAR.SYSRET_CS+8 (in either case)
*
* thus USER_DS should be between 32-bit and 64-bit code selectors:
*/
#define GDT_ENTRY_DEFAULT_USER32_CS 4
#define GDT_ENTRY_DEFAULT_USER_DS 5
#define GDT_ENTRY_DEFAULT_USER_CS 6
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
#define __USER32_DS __USER_DS
#define GDT_ENTRY_DEFAULT_USER32_CS 4
#define GDT_ENTRY_DEFAULT_USER_DS 5
#define GDT_ENTRY_DEFAULT_USER_CS 6
#define GDT_ENTRY_TSS 8 /* needs two entries */
#define GDT_ENTRY_LDT 10 /* needs two entries */
#define GDT_ENTRY_TLS_MIN 12
#define GDT_ENTRY_TLS_MAX 14
/* Needs two entries */
#define GDT_ENTRY_TSS 8
/* Needs two entries */
#define GDT_ENTRY_LDT 10
#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
#define GDT_ENTRY_TLS_MIN 12
#define GDT_ENTRY_TLS_MAX 14
/* TLS indexes for 64bit - hardcoded in arch_prctl */
#define FS_TLS 0
#define GS_TLS 1
/* Abused to load per CPU data from limit */
#define GDT_ENTRY_PER_CPU 15
#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
/*
* Number of entries in the GDT table:
*/
#define GDT_ENTRIES 16
#define GDT_ENTRIES 16
/*
* Segment selector values corresponding to the above entries:
*
* Note, selectors also need to have a correct RPL,
* expressed with the +3 value for user-space selectors:
*/
#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER32_DS __USER_DS
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
#define FS_TLS 0
#define GS_TLS 1
#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
#endif
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
#ifndef CONFIG_PARAVIRT
#define get_kernel_rpl() 0
# define get_kernel_rpl() 0
#endif
#define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32
/* Bitmask of exception vectors which push an error code on the stack */
#define EXCEPTION_ERRCODE_MASK 0x00027d00
#define GDT_SIZE (GDT_ENTRIES * 8)
#define GDT_ENTRY_TLS_ENTRIES 3
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
#define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32
/* Bitmask of exception vectors which push an error code on the stack: */
#define EXCEPTION_ERRCODE_MASK 0x00027d00
#define GDT_SIZE (GDT_ENTRIES*8)
#define GDT_ENTRY_TLS_ENTRIES 3
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
#ifdef CONFIG_TRACING
#define trace_early_idt_handlers early_idt_handlers
# define trace_early_idt_handlers early_idt_handlers
#endif
/*
@ -228,37 +260,30 @@ do { \
} while (0)
/*
* Save a segment register away
* Save a segment register away:
*/
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
* x86_32 user gs accessors.
* x86-32 user GS accessors:
*/
#ifdef CONFIG_X86_32
#ifdef CONFIG_X86_32_LAZY_GS
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
#define task_user_gs(tsk) ((tsk)->thread.gs)
#define lazy_save_gs(v) savesegment(gs, (v))
#define lazy_load_gs(v) loadsegment(gs, (v))
#else /* X86_32_LAZY_GS */
#define get_user_gs(regs) (u16)((regs)->gs)
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
#define lazy_save_gs(v) do { } while (0)
#define lazy_load_gs(v) do { } while (0)
#endif /* X86_32_LAZY_GS */
# ifdef CONFIG_X86_32_LAZY_GS
# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
# define task_user_gs(tsk) ((tsk)->thread.gs)
# define lazy_save_gs(v) savesegment(gs, (v))
# define lazy_load_gs(v) loadsegment(gs, (v))
# else /* X86_32_LAZY_GS */
# define get_user_gs(regs) (u16)((regs)->gs)
# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
# define lazy_save_gs(v) do { } while (0)
# define lazy_load_gs(v) do { } while (0)
# endif /* X86_32_LAZY_GS */
#endif /* X86_32 */
static inline unsigned long get_limit(unsigned long segment)
{
unsigned long __limit;
asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
return __limit + 1;
}
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */

View file

@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
*/
extern struct boot_params boot_params;
static inline bool kaslr_enabled(void)
{
return !!(boot_params.hdr.loadflags & KASLR_FLAG);
}
/*
* Do NOT EVER look at the BIOS memory size location.
* It does not work on many machines.

View file

@ -57,9 +57,9 @@ struct sigcontext {
unsigned long ip;
unsigned long flags;
unsigned short cs;
unsigned short gs;
unsigned short fs;
unsigned short __pad0;
unsigned short __pad2; /* Was called gs, but was always zero. */
unsigned short __pad1; /* Was called fs, but was always zero. */
unsigned short ss;
unsigned long err;
unsigned long trapno;
unsigned long oldmask;

View file

@ -13,9 +13,7 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax);
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);

View file

@ -27,23 +27,11 @@
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
661: ASM_NOP3 ; \
.pushsection .altinstr_replacement, "ax" ; \
662: __ASM_CLAC ; \
.popsection ; \
.pushsection .altinstructions, "a" ; \
altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
.popsection
#define ASM_CLAC \
ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
#define ASM_STAC \
661: ASM_NOP3 ; \
.pushsection .altinstr_replacement, "ax" ; \
662: __ASM_STAC ; \
.popsection ; \
.pushsection .altinstructions, "a" ; \
altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
.popsection
#define ASM_STAC \
ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
#else /* CONFIG_X86_SMAP */
@ -61,20 +49,20 @@
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
#define ASM_STAC \
ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */

View file

@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);

View file

@ -4,6 +4,8 @@
#ifdef __KERNEL__
#include <asm/nops.h>
static inline void native_clts(void)
{
asm volatile("clts");
@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
"+m" (*(volatile char __force *)__p));
}
static inline void clwb(volatile void *__p)
{
volatile struct { char x[64]; } *p = __p;
asm volatile(ALTERNATIVE_2(
".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
X86_FEATURE_CLFLUSHOPT,
".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
X86_FEATURE_CLWB)
: [p] "+m" (*p)
: [pax] "a" (p));
}
static inline void pcommit_sfence(void)
{
alternative(ASM_NOP7,
".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
"sfence",
X86_FEATURE_PCOMMIT);
}
#define nop() asm volatile ("nop")

View file

@ -12,6 +12,33 @@
#include <asm/percpu.h>
#include <asm/types.h>
/*
* TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
* reserve at the top of the kernel stack. We do it because of a nasty
* 32-bit corner case. On x86_32, the hardware stack frame is
* variable-length. Except for vm86 mode, struct pt_regs assumes a
* maximum-length frame. If we enter from CPL 0, the top 8 bytes of
* pt_regs don't actually exist. Ordinarily this doesn't matter, but it
* does in at least one case:
*
* If we take an NMI early enough in SYSENTER, then we can end up with
* pt_regs that extends above sp0. On the way out, in the espfix code,
* we can read the saved SS value, but that value will be above sp0.
* Without this offset, that can result in a page fault. (We are
* careful that, in this case, the value we read doesn't matter.)
*
* In vm86 mode, the hardware frame is much longer still, but we neither
* access the extra members from NMI context, nor do we write such a
* frame at sp0 at all.
*
* x86_64 has a fixed-length stack frame.
*/
#ifdef CONFIG_X86_32
# define TOP_OF_KERNEL_STACK_PADDING 8
#else
# define TOP_OF_KERNEL_STACK_PADDING 0
#endif
/*
* low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line
@ -145,7 +172,6 @@ struct thread_info {
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
/*
* macros/functions for gaining access to the thread information structure
@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
ti = (void *)(this_cpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
}
static inline unsigned long current_stack_pointer(void)
@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
/* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \
_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
_ASM_SUB $(THREAD_SIZE),reg ;
/*
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
* a certain register (to be used in assembler memory operands).
* ASM operand which evaluates to a 'thread_info' address of
* the current task, if it is known that "reg" is exactly "off"
* bytes below the top of the stack currently.
*
* ( The kernel stack's size is known at build time, it is usually
* 2 or 4 pages, and the bottom of the kernel stack contains
* the thread_info structure. So to access the thread_info very
* quickly from assembly code we can calculate down from the
* top of the kernel stack to the bottom, using constant,
* build-time calculations only. )
*
* For example, to fetch the current thread_info->flags value into %eax
* on x86-64 defconfig kernels, in syscall entry code where RSP is
* currently at exactly SIZEOF_PTREGS bytes away from the top of the
* stack:
*
* mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
*
* will translate to:
*
* 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
*
* which is below the current RSP by almost 16K.
*/
#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
#endif
@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
#endif
return false;
}
/*
* Force syscall return via IRET by making it look as if there was
* some work pending. IRET is our most capable (but slowest) syscall
* return path, which is able to restore modified SS, CS and certain
* EFLAGS values that other (fast) syscall return instructions
* are not able to restore properly.
*/
#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
#endif /* !__ASSEMBLY__ */
#ifndef __ASSEMBLY__

View file

@ -15,6 +15,7 @@
/* loadflags */
#define LOADED_HIGH (1<<0)
#define KASLR_FLAG (1<<1)
#define QUIET_FLAG (1<<5)
#define KEEP_SEGMENTS (1<<6)
#define CAN_USE_HEAP (1<<7)

View file

@ -25,13 +25,17 @@
#else /* __i386__ */
#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
#define R15 0
#define R14 8
#define R13 16
#define R12 24
#define RBP 32
#define RBX 40
/* arguments: interrupts/non tracing syscalls only save up to here*/
/* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 48
#define R10 56
#define R9 64
@ -41,15 +45,17 @@
#define RDX 96
#define RSI 104
#define RDI 112
#define ORIG_RAX 120 /* = ERROR */
/* end of arguments */
/* cpu exception frame or undefined in case of fast syscall. */
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
#define ORIG_RAX 120
/* Return frame for iretq */
#define RIP 128
#define CS 136
#define EFLAGS 144
#define RSP 152
#define SS 160
#define ARGOFFSET R11
#endif /* __ASSEMBLY__ */
/* top of stack page */

View file

@ -41,13 +41,17 @@ struct pt_regs {
#ifndef __KERNEL__
struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
/* arguments: non interrupts/non tracing syscalls only save up to here*/
/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@ -57,9 +61,12 @@ struct pt_regs {
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_rax;
/* end of arguments */
/* cpu exception frame or undefined */
/* Return frame for iretq */
unsigned long rip;
unsigned long cs;
unsigned long eflags;

View file

@ -177,9 +177,24 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
__u16 gs;
__u16 fs;
__u16 __pad0;
/*
* Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
* Linux saved and restored fs and gs in these slots. This
* was counterproductive, as fsbase and gsbase were never
* saved, so arch_prctl was presumably unreliable.
*
* If these slots are ever needed for any other purpose, there
* is some risk that very old 64-bit binaries could get
* confused. I doubt that many such binaries still work,
* though, since the same patch in 2.5.64 also removed the
* 64-bit set_thread_area syscall, so it appears that there is
* no TLS API that works in both pre- and post-2.5.64 kernels.
*/
__u16 __pad2; /* Was gs. */
__u16 __pad1; /* Was fs. */
__u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;

View file

@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += mcount_64.o
obj-y += syscall_$(BITS).o vsyscall_gtod.o
obj-$(CONFIG_IA32_EMULATION) += syscall_32.o
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o

View file

@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
__setup("noreplace-paravirt", setup_noreplace_paravirt);
#endif
#define DPRINTK(fmt, ...) \
do { \
if (debug_alternative) \
printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
#define DPRINTK(fmt, args...) \
do { \
if (debug_alternative) \
printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \
} while (0)
#define DUMP_BYTES(buf, len, fmt, args...) \
do { \
if (unlikely(debug_alternative)) { \
int j; \
\
if (!(len)) \
break; \
\
printk(KERN_DEBUG fmt, ##args); \
for (j = 0; j < (len) - 1; j++) \
printk(KERN_CONT "%02hhx ", buf[j]); \
printk(KERN_CONT "%02hhx\n", buf[j]); \
} \
} while (0)
/*
@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that asymmetric systems where
APs have less capabilities than the boot processor are not handled.
Tough. Make sure you disable such features by hand. */
/*
* Are we looking at a near JMP with a 1 or 4-byte displacement.
*/
static inline bool is_jmp(const u8 opcode)
{
return opcode == 0xeb || opcode == 0xe9;
}
static void __init_or_module
recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
{
u8 *next_rip, *tgt_rip;
s32 n_dspl, o_dspl;
int repl_len;
if (a->replacementlen != 5)
return;
o_dspl = *(s32 *)(insnbuf + 1);
/* next_rip of the replacement JMP */
next_rip = repl_insn + a->replacementlen;
/* target rip of the replacement JMP */
tgt_rip = next_rip + o_dspl;
n_dspl = tgt_rip - orig_insn;
DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
if (tgt_rip - orig_insn >= 0) {
if (n_dspl - 2 <= 127)
goto two_byte_jmp;
else
goto five_byte_jmp;
/* negative offset */
} else {
if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
goto two_byte_jmp;
else
goto five_byte_jmp;
}
two_byte_jmp:
n_dspl -= 2;
insnbuf[0] = 0xeb;
insnbuf[1] = (s8)n_dspl;
add_nops(insnbuf + 2, 3);
repl_len = 2;
goto done;
five_byte_jmp:
n_dspl -= 5;
insnbuf[0] = 0xe9;
*(s32 *)&insnbuf[1] = n_dspl;
repl_len = 5;
done:
DPRINTK("final displ: 0x%08x, JMP 0x%lx",
n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
}
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
if (instr[0] != 0x90)
return;
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
}
/*
* Replace instructions with better alternatives for this CPU type. This runs
* before SMP is initialized to avoid SMP problems with self modifying code.
* This implies that asymmetric systems where APs have less capabilities than
* the boot processor are not handled. Tough. Make sure you disable such
* features by hand.
*/
void __init_or_module apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
u8 *instr, *replacement;
u8 insnbuf[MAX_PATCH_LEN];
DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
DPRINTK("alt table %p -> %p", start, end);
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite a previous scanned alternative code.
* alternative code can overwrite previously scanned alternative code.
* Some kernel functions (e.g. memcpy, memset, etc) use this order to
* patch code.
*
@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
* order.
*/
for (a = start; a < end; a++) {
int insnbuf_sz = 0;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->replacementlen > a->instrlen);
BUG_ON(a->instrlen > sizeof(insnbuf));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
if (!boot_cpu_has(a->cpuid))
if (!boot_cpu_has(a->cpuid)) {
if (a->padlen > 1)
optimize_nops(a, instr);
continue;
}
DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
memcpy(insnbuf, replacement, a->replacementlen);
insnbuf_sz = a->replacementlen;
/* 0xe8 is a relative jump; fix the offset. */
if (*insnbuf == 0xe8 && a->replacementlen == 5)
*(s32 *)(insnbuf + 1) += replacement - instr;
if (*insnbuf == 0xe8 && a->replacementlen == 5) {
*(s32 *)(insnbuf + 1) += replacement - instr;
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
*(s32 *)(insnbuf + 1),
(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
}
add_nops(insnbuf + a->replacementlen,
a->instrlen - a->replacementlen);
if (a->replacementlen && is_jmp(replacement[0]))
recompute_jump(a, instr, replacement, insnbuf);
text_poke_early(instr, insnbuf, a->instrlen);
if (a->instrlen > a->replacementlen) {
add_nops(insnbuf + a->replacementlen,
a->instrlen - a->replacementlen);
insnbuf_sz += a->instrlen - a->replacementlen;
}
DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
text_poke_early(instr, insnbuf, insnbuf_sz);
}
}
#ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end)
{
@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
smp->locks_end = locks_end;
smp->text = text;
smp->text_end = text_end;
DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
__func__, smp->locks, smp->locks_end,
DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
list_add_tail(&smp->next, &smp_alt_modules);
@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
return 0;
}
#endif
#endif /* CONFIG_SMP */
#ifdef CONFIG_PARAVIRT
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
if (likely(!bp_patching_in_progress))
return 0;
if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
return 0;
/* set up the specified breakpoint handler */

View file

@ -68,7 +68,7 @@ void foo(void)
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
offsetofend(struct tss_struct, SYSENTER_stack));
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();

View file

@ -81,6 +81,7 @@ int main(void)
#undef ENTRY
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
BLANK();
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);

View file

@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
/* 3DNow or LM implies PREFETCHW */
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
}
#ifdef CONFIG_X86_32

View file

@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#endif
}
#ifdef CONFIG_X86_64
#ifdef CONFIG_IA32_EMULATION
/* May not be __init: called during resume */
static void syscall32_cpu_init(void)
{
/* Load these always in case some future AMD CPU supports
SYSENTER from compat mode too. */
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
#endif /* CONFIG_IA32_EMULATION */
#endif /* CONFIG_X86_64 */
/*
* Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
* on 32-bit kernels:
*/
#ifdef CONFIG_X86_32
void enable_sep_cpu(void)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
struct tss_struct *tss;
int cpu;
if (!boot_cpu_has(X86_FEATURE_SEP)) {
put_cpu();
return;
}
cpu = get_cpu();
tss = &per_cpu(cpu_tss, cpu);
if (!boot_cpu_has(X86_FEATURE_SEP))
goto out;
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
*/
tss->x86_tss.ss1 = __KERNEL_CS;
tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
wrmsr(MSR_IA32_SYSENTER_ESP,
(unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
out:
put_cpu();
}
#endif
@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
#ifdef CONFIG_X86_64
@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
/*
* The following four percpu variables are hot. Align current_task to
* cacheline size such that all four fall in the same cacheline.
* The following percpu variables are hot. Align current_task to
* cacheline size such that they fall in the same cacheline.
*/
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
@ -1171,10 +1170,23 @@ void syscall_init(void)
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
wrmsrl(MSR_LSTAR, system_call);
wrmsrl(MSR_CSTAR, ignore_sysret);
#ifdef CONFIG_IA32_EMULATION
syscall32_cpu_init();
wrmsrl(MSR_CSTAR, ia32_cstar_target);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
* This does not cause SYSENTER to jump to the wrong location, because
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
#else
wrmsrl(MSR_CSTAR, ignore_sysret);
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
#endif
/* Flags to clear on syscall */
@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
* top of the kernel stack directly.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
@ -1307,7 +1328,7 @@ void cpu_init(void)
*/
load_ucode_ap();
t = &per_cpu(init_tss, cpu);
t = &per_cpu(cpu_tss, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
@ -1391,7 +1412,7 @@ void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
struct thread_struct *thread = &curr->thread;
wait_for_master_cpu(cpu);

View file

@ -2146,6 +2146,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
*/
static unsigned long code_segment_base(struct pt_regs *regs)
{
/*
* For IA32 we look at the GDT/LDT segment base to convert the
* effective IP to a linear address.
*/
#ifdef CONFIG_X86_32
/*
* If we are in VM86 mode, add the segment offset to convert to a
* linear address.
@ -2153,18 +2159,12 @@ static unsigned long code_segment_base(struct pt_regs *regs)
if (regs->flags & X86_VM_MASK)
return 0x10 * regs->cs;
/*
* For IA32 we look at the GDT/LDT segment base to convert the
* effective IP to a linear address.
*/
#ifdef CONFIG_X86_32
if (user_mode(regs) && regs->cs != __USER_CS)
return get_segment_base(regs->cs);
#else
if (test_thread_flag(TIF_IA32)) {
if (user_mode(regs) && regs->cs != __USER32_CS)
return get_segment_base(regs->cs);
}
if (user_mode(regs) && !user_64bit_mode(regs) &&
regs->cs != __USER32_CS)
return get_segment_base(regs->cs);
#endif
return 0;
}

View file

@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
if (!user_mode_vm(regs)) {
if (!user_mode(regs)) {
crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs;
}

View file

@ -278,7 +278,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
print_modules();
show_regs(regs);
#ifdef CONFIG_X86_32
if (user_mode_vm(regs)) {
if (user_mode(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
} else {
@ -307,7 +307,7 @@ void die(const char *str, struct pt_regs *regs, long err)
unsigned long flags = oops_begin();
int sig = SIGSEGV;
if (!user_mode_vm(regs))
if (!user_mode(regs))
report_bug(regs->ip, regs);
if (__die(str, regs, err))

View file

@ -123,13 +123,13 @@ void show_regs(struct pt_regs *regs)
int i;
show_regs_print_info(KERN_EMERG);
__show_regs(regs, !user_mode_vm(regs));
__show_regs(regs, !user_mode(regs));
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
*/
if (!user_mode_vm(regs)) {
if (!user_mode(regs)) {
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
unsigned char c;

View file

@ -395,10 +395,13 @@ sysenter_past_esp:
/*CFI_REL_OFFSET cs, 0*/
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
* A tiny bit of offset fixup is necessary: TI_sysenter_return
* is relative to thread_info, which is at the bottom of the
* kernel stack page. 4*4 means the 4 words pushed above;
* TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
* and THREAD_SIZE takes us to the bottom.
*/
pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
@ -432,7 +435,7 @@ sysenter_after_call:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx
jne sysexit_audit
jnz sysexit_audit
sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
@ -460,7 +463,7 @@ sysenter_audit:
sysexit_audit:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work
jnz syscall_exit_work
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
movl %eax,%edx /* second arg, syscall return value */
@ -472,7 +475,7 @@ sysexit_audit:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work
jnz syscall_exit_work
movl PT_EAX(%esp),%eax /* reload syscall return value */
jmp sysenter_exit
#endif
@ -510,7 +513,7 @@ syscall_exit:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx # current->work
jne syscall_exit_work
jnz syscall_exit_work
restore_all:
TRACE_IRQS_IRET
@ -612,7 +615,7 @@ work_notifysig: # deal with pending signals and
#ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
jnz work_notifysig_v86 # returning to kernel-space or
# vm86-space
1:
#else
@ -720,43 +723,22 @@ END(sysenter_badsys)
.endm
/*
* Build the entry stubs and pointer table with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a
* single cache line on all modern x86 implementations.
* Build the entry stubs with some assembler magic.
* We pack 1 stub into every 8-byte block.
*/
.section .init.rodata,"a"
ENTRY(interrupt)
.section .entry.text, "ax"
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
.align 8
ENTRY(irq_entries_start)
RING0_INT_FRAME
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
.balign 32
.rept 7
.if vector < FIRST_SYSTEM_VECTOR
.if vector <> FIRST_EXTERNAL_VECTOR
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
vector=vector+1
jmp common_interrupt
CFI_ADJUST_CFA_OFFSET -4
.endif
1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
.previous
.long 1b
.section .entry.text, "ax"
vector=vector+1
.endif
.endr
2: jmp common_interrupt
.endr
.align 8
.endr
END(irq_entries_start)
.previous
END(interrupt)
.previous
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
661: pushl_cfi $do_general_protection
662:
.section .altinstructions,"a"
altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
.previous
.section .altinstr_replacement,"ax"
663: pushl $do_simd_coprocessor_error
664:
.previous
ALTERNATIVE "pushl_cfi $do_general_protection", \
"pushl $do_simd_coprocessor_error", \
X86_FEATURE_XMM
#else
pushl_cfi $do_simd_coprocessor_error
#endif
@ -1240,20 +1216,13 @@ error_code:
/*CFI_REL_OFFSET es, 0*/
pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0*/
pushl_cfi %eax
CFI_REL_OFFSET eax, 0
pushl_cfi %ebp
CFI_REL_OFFSET ebp, 0
pushl_cfi %edi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg eax
pushl_cfi_reg ebp
pushl_cfi_reg edi
pushl_cfi_reg esi
pushl_cfi_reg edx
pushl_cfi_reg ecx
pushl_cfi_reg ebx
cld
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs

File diff suppressed because it is too large Load diff

View file

@ -22,6 +22,7 @@
#include <asm/cpufeature.h>
#include <asm/percpu.h>
#include <asm/nops.h>
#include <asm/bootparam.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@ -90,7 +91,7 @@ ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
testb $(1<<6), BP_loadflags(%esi)
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 2f
/*

View file

@ -1,5 +1,5 @@
/*
* linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
* linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
*
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@ -56,7 +56,7 @@ startup_64:
* %rsi holds a physical pointer to real_mode_data.
*
* We come here either directly from a 64bit bootloader, or from
* arch/x86_64/boot/compressed/head.S.
* arch/x86/boot/compressed/head_64.S.
*
* We only come here initially at boot nothing else comes here.
*
@ -146,7 +146,7 @@ startup_64:
leaq level2_kernel_pgt(%rip), %rdi
leaq 4096(%rdi), %r8
/* See if it is a valid page table entry */
1: testq $1, 0(%rdi)
1: testb $1, 0(%rdi)
jz 2f
addq %rbp, 0(%rdi)
/* Go to the next page */

View file

@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
static inline bool interrupted_user_mode(void)
{
struct pt_regs *regs = get_irq_regs();
return regs && user_mode_vm(regs);
return regs && user_mode(regs);
}
/*

View file

@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* because the ->io_bitmap_max value must match the bitmap
* contents:
*/
tss = &per_cpu(init_tss, get_cpu());
tss = &per_cpu(cpu_tss, get_cpu());
if (turn_on)
bitmap_clear(t->io_bitmap_ptr, from, num);

View file

@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
if (unlikely(!desc))
return false;
if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
desc->handle_irq(irq, desc);

View file

@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs))
if (user_mode(regs))
return;
if (regs->sp >= curbase + sizeof(struct thread_info) +

View file

@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
#endif
for_each_clear_bit_from(i, used_vectors, first_system_vector) {
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
}
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, used_vectors, NR_VECTORS)

View file

@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32
switch (regno) {
case GDB_SS:
if (!user_mode_vm(regs))
if (!user_mode(regs))
*(unsigned long *)mem = __KERNEL_DS;
break;
case GDB_SP:
if (!user_mode_vm(regs))
if (!user_mode(regs))
*(unsigned long *)mem = kernel_stack_pointer(regs);
break;
case GDB_GS:

View file

@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
struct kprobe *p;
struct kprobe_ctlblk *kcb;
if (user_mode_vm(regs))
if (user_mode(regs))
return 0;
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
struct die_args *args = data;
int ret = NOTIFY_DONE;
if (args->regs && user_mode_vm(args->regs))
if (args->regs && user_mode(args->regs))
return ret;
if (val == DIE_GPF) {

View file

@ -33,6 +33,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
#if 0
#define DEBUGP(fmt, ...) \
@ -47,21 +48,13 @@ do { \
#ifdef CONFIG_RANDOMIZE_BASE
static unsigned long module_load_offset;
static int randomize_modules = 1;
/* Mutex protects the module_load_offset. */
static DEFINE_MUTEX(module_kaslr_mutex);
static int __init parse_nokaslr(char *p)
{
randomize_modules = 0;
return 0;
}
early_param("nokaslr", parse_nokaslr);
static unsigned long int get_module_load_offset(void)
{
if (randomize_modules) {
if (kaslr_enabled()) {
mutex_lock(&module_kaslr_mutex);
/*
* Calculate the module_load_offset the first time this

View file

@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
}
/*
* RIP, flags, and the argument registers are usually saved.
* orig_ax is probably okay, too.
* These registers are always saved on 64-bit syscall entry.
* On 32-bit entry points, they are saved too except r8..r11.
*/
regs_user_copy->ip = user_regs->ip;
regs_user_copy->ax = user_regs->ax;
regs_user_copy->cx = user_regs->cx;
regs_user_copy->dx = user_regs->dx;
regs_user_copy->si = user_regs->si;
@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
regs_user_copy->r11 = user_regs->r11;
regs_user_copy->orig_ax = user_regs->orig_ax;
regs_user_copy->flags = user_regs->flags;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
/*
* Don't even try to report the "rest" regs.
* Most system calls don't save these registers, don't report them.
*/
regs_user_copy->bx = -1;
regs_user_copy->bp = -1;
@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
/*
* For this to be at all useful, we need a reasonable guess for
* sp and the ABI. Be careful: we're in NMI context, and we're
* the ABI. Be careful: we're in NMI context, and we're
* considering current to be the current task, so we should
* be careful not to look at any other percpu variables that might
* change during context switches.
*/
if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
task_thread_info(current)->status & TS_COMPAT) {
/* Easy case: we're in a compat syscall. */
regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
} else if (user_regs->orig_ax != -1) {
/*
* We're probably in a 64-bit syscall.
* Warning: this code is severely racy. At least it's better
* than just blindly copying user_regs.
*/
regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
regs_user_copy->sp = this_cpu_read(old_rsp);
regs_user_copy->cs = __USER_CS;
regs_user_copy->ss = __USER_DS;
regs_user_copy->cx = -1; /* usually contains garbage */
} else {
/* We're probably in an interrupt or exception. */
regs_user->abi = user_64bit_mode(user_regs) ?
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
}
regs_user->abi = user_64bit_mode(user_regs) ?
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
regs_user->regs = regs_user_copy;
}

View file

@ -38,7 +38,26 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
},
#ifdef CONFIG_X86_32
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
@ -110,7 +129,7 @@ void exit_thread(void)
unsigned long *bp = t->io_bitmap_ptr;
if (bp) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);

View file

@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long sp;
unsigned short ss, gs;
if (user_mode_vm(regs)) {
if (user_mode(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
gs = get_user_gs(regs);
@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->ip = new_ip;
regs->sp = new_sp;
regs->flags = X86_EFLAGS_IF;
/*
* force it to the iret return path by making it look as if there was
* some work pending.
*/
set_thread_flag(TIF_NOTIFY_RESUME);
force_iret();
}
EXPORT_SYMBOL_GPL(start_thread);
@ -248,18 +244,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/*
* Reload esp0.
*/
load_sp0(tss, next);
/*
* Save away %gs. No need to save %fs, as it was saved on the
* stack on entry. No need to save %es and %ds, as those are
@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
/*
* Reload esp0, kernel_stack, and current_top_of_stack. This changes
* current_thread_info().
*/
load_sp0(tss, next);
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
/*
* Restore %gs if needed (which is common)

View file

@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
__visible DEFINE_PER_CPU(unsigned long, old_rsp);
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
p->thread.io_bitmap_ptr = NULL;
@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32))
if (is_ia32_task())
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
else
@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
this_cpu_write(old_rsp, new_sp);
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
force_iret();
}
void
@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned fsindex, gsindex;
fpu_switch_t fpu;
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/* Reload esp0 and ss1. */
load_sp0(tss, next);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
*
@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
prev->usersp = this_cpu_read(old_rsp);
this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p);
/*
@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
/* Reload esp0 and ss1. This changes current_thread_info(). */
load_sp0(tss, next);
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
unsigned long KSTK_ESP(struct task_struct *task)
{
return (test_tsk_thread_flag(task, TIF_IA32)) ?
(task_pt_regs(task)->sp) : ((task)->thread.usersp);
return task_pt_regs(task)->sp;
}

View file

@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
case offsetof(struct user_regs_struct,cs):
if (unlikely(value == 0))
return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->cs = value;
#endif
task_pt_regs(task)->cs = value;
break;
case offsetof(struct user_regs_struct,ss):
if (unlikely(value == 0))
return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->ss = value;
#endif
task_pt_regs(task)->ss = value;
break;
}
@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
info->si_code = si_code;
info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
}
void user_single_step_siginfo(struct task_struct *tsk,

View file

@ -226,23 +226,23 @@ swap_pages:
movl (%ebx), %ecx
addl $4, %ebx
1:
testl $0x1, %ecx /* is it a destination page */
testb $0x1, %cl /* is it a destination page */
jz 2f
movl %ecx, %edi
andl $0xfffff000, %edi
jmp 0b
2:
testl $0x2, %ecx /* is it an indirection page */
testb $0x2, %cl /* is it an indirection page */
jz 2f
movl %ecx, %ebx
andl $0xfffff000, %ebx
jmp 0b
2:
testl $0x4, %ecx /* is it the done indicator */
testb $0x4, %cl /* is it the done indicator */
jz 2f
jmp 3f
2:
testl $0x8, %ecx /* is it the source indicator */
testb $0x8, %cl /* is it the source indicator */
jz 0b /* Ignore it otherwise */
movl %ecx, %esi /* For every source page do a copy */
andl $0xfffff000, %esi

View file

@ -123,7 +123,7 @@ identity_mapped:
* Set cr4 to a known state:
* - physical address extension enabled
*/
movq $X86_CR4_PAE, %rax
movl $X86_CR4_PAE, %eax
movq %rax, %cr4
jmp 1f
@ -221,23 +221,23 @@ swap_pages:
movq (%rbx), %rcx
addq $8, %rbx
1:
testq $0x1, %rcx /* is it a destination page? */
testb $0x1, %cl /* is it a destination page? */
jz 2f
movq %rcx, %rdi
andq $0xfffffffffffff000, %rdi
jmp 0b
2:
testq $0x2, %rcx /* is it an indirection page? */
testb $0x2, %cl /* is it an indirection page? */
jz 2f
movq %rcx, %rbx
andq $0xfffffffffffff000, %rbx
jmp 0b
2:
testq $0x4, %rcx /* is it the done indicator? */
testb $0x4, %cl /* is it the done indicator? */
jz 2f
jmp 3f
2:
testq $0x8, %rcx /* is it the source indicator? */
testb $0x8, %cl /* is it the source indicator? */
jz 0b /* Ignore it otherwise */
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi
@ -246,17 +246,17 @@ swap_pages:
movq %rsi, %rax
movq %r10, %rdi
movq $512, %rcx
movl $512, %ecx
rep ; movsq
movq %rax, %rdi
movq %rdx, %rsi
movq $512, %rcx
movl $512, %ecx
rep ; movsq
movq %rdx, %rdi
movq %r10, %rsi
movq $512, %rcx
movl $512, %ecx
rep ; movsq
lea PAGE_SIZE(%rax), %rsi

View file

@ -832,10 +832,15 @@ static void __init trim_low_memory_range(void)
static int
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
{
pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
"(relocation range: 0x%lx-0x%lx)\n",
(unsigned long)&_text - __START_KERNEL, __START_KERNEL,
__START_KERNEL_map, MODULES_VADDR-1);
if (kaslr_enabled()) {
pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
(unsigned long)&_text - __START_KERNEL,
__START_KERNEL,
__START_KERNEL_map,
MODULES_VADDR-1);
} else {
pr_emerg("Kernel Offset: disabled\n");
}
return 0;
}

View file

@ -61,8 +61,7 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
{
void __user *buf;
unsigned int tmpflags;
@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
#endif /* CONFIG_X86_32 */
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
#ifdef CONFIG_X86_64
COPY(r8);
@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY(r15);
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
force_iret();
return err;
}
@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
#else /* !CONFIG_X86_32 */
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
put_user_ex(0, &sc->__pad2);
put_user_ex(0, &sc->__pad1);
put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
/* Set up the CS register to run signal handlers in 64-bit mode,
even if the handler happens to be interrupting 32-bit code. */
/*
* Set up the CS and SS registers to run signal handlers in
* 64-bit mode, even if the handler happens to be interrupting
* 32-bit or 16-bit code.
*
* SS is subtle. In 64-bit mode, we don't need any particular
* SS descriptor, but we do need SS to be valid. It's possible
* that the old SS is entirely bogus -- this can happen if the
* signal we're trying to deliver is #GP or #SS caused by a bad
* SS value.
*/
regs->cs = __USER_CS;
regs->ss = __USER_DS;
return 0;
}
@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame;
unsigned long ax;
sigset_t set;
frame = (struct sigframe __user *)(regs->sp - 8);
@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->sc, &ax))
if (restore_sigcontext(regs, &frame->sc))
goto badframe;
return ax;
return regs->ax;
badframe:
signal_fault(regs, frame, "sigreturn");
@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void)
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
return regs->ax;
badframe:
signal_fault(regs, frame, "rt_sigreturn");
@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
unsigned long ax;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
return regs->ax;
badframe:
signal_fault(regs, frame, "x32 rt_sigreturn");

View file

@ -779,6 +779,26 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
return boot_error;
}
void common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
per_cpu(cpu_current_top_of_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
}
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int cpu0_nmi_registered = 0;
unsigned long timeout;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(idle))) - 1);
per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;
@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/* the FPU context is blank, nobody can own it */
__cpu_disable_lazy_restore(cpu);
common_cpu_up(cpu, tidle);
err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);

View file

@ -5,21 +5,29 @@
#include <linux/cache.h>
#include <asm/asm-offsets.h>
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
#ifdef CONFIG_IA32_EMULATION
#define SYM(sym, compat) compat
#else
#define SYM(sym, compat) sym
#define ia32_sys_call_table sys_call_table
#define __NR_ia32_syscall_max __NR_syscall_max
#endif
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
typedef asmlinkage void (*sys_call_ptr_t)(void);
extern asmlinkage void sys_ni_syscall(void);
__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_max] = &sys_ni_syscall,
[0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h>
};

View file

@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
if (!user_mode(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else

View file

@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
{
enum ctx_state prev_state;
if (user_mode_vm(regs)) {
if (user_mode(regs)) {
/* Other than that, we're just an exception. */
prev_state = exception_enter();
} else {
@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
/* Must be before exception_exit. */
preempt_count_sub(HARDIRQ_OFFSET);
if (user_mode_vm(regs))
if (user_mode(regs))
return exception_exit(prev_state);
else
rcu_nmi_exit();
@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
*
* IST exception handlers normally cannot schedule. As a special
* exception, if the exception interrupted userspace code (i.e.
* user_mode_vm(regs) would return true) and the exception was not
* user_mode(regs) would return true) and the exception was not
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
* begins a non-atomic section within an ist_enter()/ist_exit() region.
* Callers are responsible for enabling interrupts themselves inside
@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
*/
void ist_begin_non_atomic(struct pt_regs *regs)
{
BUG_ON(!user_mode_vm(regs));
BUG_ON(!user_mode(regs));
/*
* Sanity check: we need to be on the normal thread stack. This
* will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault.
*/
BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
& ~(THREAD_SIZE - 1)) != 0);
BUG_ON((unsigned long)(current_top_of_stack() -
current_stack_pointer()) >= THREAD_SIZE);
preempt_count_sub(HARDIRQ_OFFSET);
}
@ -194,8 +194,7 @@ static nokprobe_inline int
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
if (v8086_mode(regs)) {
/*
* Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
* On nmi (interrupt 2), do_trap should not be called.
@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}
return -1;
}
#endif
if (!user_mode(regs)) {
if (!fixup_exception(regs)) {
tsk->thread.error_code = error_code;
@ -384,7 +383,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
goto exit;
conditional_sti(regs);
if (!user_mode_vm(regs))
if (!user_mode(regs))
die("bounds", regs, error_code);
if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
prev_state = exception_enter();
conditional_sti(regs);
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
if (v8086_mode(regs)) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
goto exit;
}
#endif
tsk = current;
if (!user_mode(regs)) {
@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/* Copy the remainder of the stack from the current stack. */
memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
BUG_ON(!user_mode_vm(&new_stack->regs));
BUG_ON(!user_mode(&new_stack->regs));
return new_stack;
}
NOKPROBE_SYMBOL(fixup_bad_iret);
@ -637,7 +634,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
if (!dr6 && user_mode_vm(regs))
if (!dr6 && user_mode(regs))
user_icebp = 1;
/* Catch kmemcheck conditions first of all! */
@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
if (regs->flags & X86_VM_MASK) {
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
preempt_conditional_cli(regs);
@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return;
conditional_sti(regs);
if (!user_mode_vm(regs))
if (!user_mode(regs))
{
if (!fixup_exception(regs)) {
task->thread.error_code = error_code;
@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
/* Set of traps needed for early debugging. */
void __init early_trap_init(void)
{
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
/*
* Don't use IST to set DEBUG_STACK as it doesn't work until TSS
* is ready in cpu_init() <-- trap_init(). Before trap_init(),
* CPU runs at ring 0 so it is impossible to hit an invalid
* stack. Using the original stack works well enough at this
* early stage. DEBUG_STACK will be equipped after cpu_init() in
* trap_init().
*
* We don't need to set trace_idt_table like set_intr_gate(),
* since we don't have trace_debug and it will be reset to
* 'debug' in trap_init() by set_intr_gate_ist().
*/
set_intr_gate_notrace(X86_TRAP_DB, debug);
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
set_system_intr_gate(X86_TRAP_BP, &int3);
#ifdef CONFIG_X86_32
set_intr_gate(X86_TRAP_PF, page_fault);
#endif
@ -1005,6 +1014,15 @@ void __init trap_init(void)
*/
cpu_init();
/*
* X86_TRAP_DB and X86_TRAP_BP have been set
* in early_trap_init(). However, ITS works only after
* cpu_init() loads TSS. See comments in early_trap_init().
*/
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64

View file

@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
int ret = NOTIFY_DONE;
/* We are only interested in userspace traps */
if (regs && !user_mode_vm(regs))
if (regs && !user_mode(regs))
return NOTIFY_DONE;
switch (val) {

View file

@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
do_exit(SIGSEGV);
}
tss = &per_cpu(init_tss, get_cpu());
tss = &per_cpu(cpu_tss, get_cpu());
current->thread.sp0 = current->thread.saved_sp0;
current->thread.sysenter_cs = __KERNEL_CS;
load_sp0(tss, &current->thread);
@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
tsk->thread.saved_fs = info->regs32->fs;
tsk->thread.saved_gs = get_user_gs(info->regs32);
tss = &per_cpu(init_tss, get_cpu());
tss = &per_cpu(cpu_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
if (cpu_has_sep)
tsk->thread.sysenter_cs = 0;

View file

@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Not us! */
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
if (i != SYSCALL_VECTOR)
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
}
/*
@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
{
lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
THREAD_SIZE / PAGE_SIZE);
tss->x86_tss.sp0 = thread->sp0;
}
/* Let's just say, I wouldn't do debugging under a Guest. */

View file

@ -13,16 +13,6 @@
#include <asm/alternative-asm.h>
#include <asm/dwarf2.h>
.macro SAVE reg
pushl_cfi %\reg
CFI_REL_OFFSET \reg, 0
.endm
.macro RESTORE reg
popl_cfi %\reg
CFI_RESTORE \reg
.endm
.macro read64 reg
movl %ebx, %eax
movl %ecx, %edx
@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
.macro addsub_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC
SAVE ebp
SAVE ebx
SAVE esi
SAVE edi
pushl_cfi_reg ebp
pushl_cfi_reg ebx
pushl_cfi_reg esi
pushl_cfi_reg edi
movl %eax, %esi
movl %edx, %edi
@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
RESTORE edi
RESTORE esi
RESTORE ebx
RESTORE ebp
popl_cfi_reg edi
popl_cfi_reg esi
popl_cfi_reg ebx
popl_cfi_reg ebp
ret
CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
@ -104,7 +94,7 @@ addsub_return sub sub sbb
.macro incdec_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC
SAVE ebx
pushl_cfi_reg ebx
read64 %esi
1:
@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
RESTORE ebx
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
@ -130,7 +120,7 @@ incdec_return dec sub sbb
ENTRY(atomic64_dec_if_positive_cx8)
CFI_STARTPROC
SAVE ebx
pushl_cfi_reg ebx
read64 %esi
1:
@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
2:
movl %ebx, %eax
movl %ecx, %edx
RESTORE ebx
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_dec_if_positive_cx8)
ENTRY(atomic64_add_unless_cx8)
CFI_STARTPROC
SAVE ebp
SAVE ebx
pushl_cfi_reg ebp
pushl_cfi_reg ebx
/* these just push these two parameters on the stack */
SAVE edi
SAVE ecx
pushl_cfi_reg edi
pushl_cfi_reg ecx
movl %eax, %ebp
movl %edx, %edi
@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
3:
addl $8, %esp
CFI_ADJUST_CFA_OFFSET -8
RESTORE ebx
RESTORE ebp
popl_cfi_reg ebx
popl_cfi_reg ebp
ret
4:
cmpl %edx, 4(%esp)
@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
ENTRY(atomic64_inc_not_zero_cx8)
CFI_STARTPROC
SAVE ebx
pushl_cfi_reg ebx
read64 %esi
1:
@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
movl $1, %eax
3:
RESTORE ebx
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)

View file

@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
ENTRY(csum_partial)
CFI_STARTPROC
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg esi
pushl_cfi_reg ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff
@ -127,14 +125,12 @@ ENTRY(csum_partial)
6: addl %ecx,%eax
adcl $0, %eax
7:
testl $1, 12(%esp)
testb $1, 12(%esp)
jz 8f
roll $8, %eax
8:
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi_reg ebx
popl_cfi_reg esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
@ -145,10 +141,8 @@ ENDPROC(csum_partial)
ENTRY(csum_partial)
CFI_STARTPROC
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg esi
pushl_cfi_reg ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
@ -251,14 +245,12 @@ ENTRY(csum_partial)
addl %ebx,%eax
adcl $0,%eax
80:
testl $1, 12(%esp)
testb $1, 12(%esp)
jz 90f
roll $8, %eax
90:
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi_reg ebx
popl_cfi_reg esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
pushl_cfi %edi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg edi
pushl_cfi_reg esi
pushl_cfi_reg ebx
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
.previous
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi_reg ebx
popl_cfi_reg esi
popl_cfi_reg edi
popl_cfi %ecx # equivalent to addl $4,%esp
ret
CFI_ENDPROC
@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi %edi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi_reg ebx
pushl_cfi_reg edi
pushl_cfi_reg esi
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
popl_cfi %esi
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi_reg esi
popl_cfi_reg edi
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)

View file

@ -1,31 +1,35 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
* Zero a page.
* rdi page
*/
ENTRY(clear_page_c)
* Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
* recommended to use this when possible and we do use them by default.
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
* Otherwise, use original.
*/
/*
* Zero a page.
* %rdi - page
*/
ENTRY(clear_page)
CFI_STARTPROC
ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
"jmp clear_page_c_e", X86_FEATURE_ERMS
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
CFI_ENDPROC
ENDPROC(clear_page_c)
ENDPROC(clear_page)
ENTRY(clear_page_c_e)
ENTRY(clear_page_orig)
CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
CFI_ENDPROC
ENDPROC(clear_page_c_e)
ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@ -45,29 +49,13 @@ ENTRY(clear_page)
nop
ret
CFI_ENDPROC
.Lclear_page_end:
ENDPROC(clear_page)
ENDPROC(clear_page_orig)
/*
* Some CPUs support enhanced REP MOVSB/STOSB instructions.
* It is recommended to use this when possible.
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
* Otherwise, use original function.
*
*/
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
3:
.previous
.section .altinstructions,"a"
altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
.Lclear_page_end-clear_page, 2b-1b
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
.Lclear_page_end-clear_page,3b-2b
.previous
ENTRY(clear_page_c_e)
CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
CFI_ENDPROC
ENDPROC(clear_page_c_e)

View file

@ -2,23 +2,26 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
* Some CPUs run faster using the string copy instructions (sane microcode).
* It is also a lot simpler. Use this when possible. But, don't use streaming
* copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
* prefetch distance based on SMP/UP.
*/
ALIGN
copy_page_rep:
ENTRY(copy_page)
CFI_STARTPROC
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
CFI_ENDPROC
ENDPROC(copy_page_rep)
ENDPROC(copy_page)
/*
* Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
* Could vary the prefetch distance based on SMP/UP.
*/
ENTRY(copy_page)
ENTRY(copy_page_regs)
CFI_STARTPROC
subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8
@ -90,21 +93,5 @@ ENTRY(copy_page)
addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
CFI_ENDPROC
ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
.Lcopy_page_end-copy_page, 2b-1b
.previous
ENDPROC(copy_page_regs)

View file

@ -8,9 +8,6 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
@ -19,33 +16,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
/*
* By placing feature2 after feature1 in altinstructions section, we logically
* implement:
* If CPU has feature2, jmp to alt2 is used
* else if CPU has feature1, jmp to alt1 is used
* else jmp to orig is used.
*/
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
3: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
.section .altinstructions,"a"
altinstruction_entry 0b,2b,\feature1,5,5
altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
@ -67,7 +38,6 @@
_ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b)
#endif
.endm
/* Standard copy_to_user with segment limit checking */
@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
copy_user_generic_unrolled,copy_user_generic_string, \
copy_user_enhanced_fast_string
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
CFI_ENDPROC
ENDPROC(_copy_to_user)
@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
copy_user_generic_unrolled,copy_user_generic_string, \
copy_user_enhanced_fast_string
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
CFI_ENDPROC
ENDPROC(_copy_from_user)

View file

@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
/* handle last odd byte */
.Lhandle_1:
testl $1, %r10d
testb $1, %r10b
jz .Lende
xorl %ebx, %ebx
source

View file

@ -52,6 +52,13 @@
*/
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
{
/*
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
* even if the input buffer is long enough to hold them.
*/
if (buf_len > MAX_INSN_SIZE)
buf_len = MAX_INSN_SIZE;
memset(insn, 0, sizeof(*insn));
insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len;
@ -164,6 +171,12 @@ void insn_get_prefixes(struct insn *insn)
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
/*
* For VEX2, fake VEX3-like byte#2.
* Makes it easier to decode vex.W, vex.vvvv,
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
*/
insn->vex_prefix.bytes[2] = b2 & 0x7f;
insn->vex_prefix.nbytes = 2;
insn->next_byte += 2;
}

View file

@ -1,11 +1,19 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
.weak memcpy
/*
* memcpy - Copy a memory block.
*
@ -17,15 +25,11 @@
* Output:
* rax original destination
*/
ENTRY(__memcpy)
ENTRY(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
@ -34,29 +38,21 @@
movl %edx, %ecx
rep movsb
ret
.Lmemcpy_e:
.previous
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
* memcpy_c. Use memcpy_c_e when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
ENTRY(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
.previous
ENDPROC(memcpy_erms)
.weak memcpy
ENTRY(__memcpy)
ENTRY(memcpy)
ENTRY(memcpy_orig)
CFI_STARTPROC
movq %rdi, %rax
@ -183,26 +179,4 @@ ENTRY(memcpy)
.Lend:
retq
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.section .altinstructions, "a"
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
.previous
ENDPROC(memcpy_orig)

View file

@ -5,7 +5,6 @@
* This assembly file is re-written from memmove_64.c file.
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#define _STRING_C
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
@ -44,6 +43,8 @@ ENTRY(__memmove)
jg 2f
.Lmemmove_begin_forward:
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
* so we handle small size by general register.
@ -207,21 +208,5 @@ ENTRY(__memmove)
13:
retq
CFI_ENDPROC
.section .altinstr_replacement,"ax"
.Lmemmove_begin_forward_efs:
/* Forward moving data. */
movq %rdx, %rcx
rep movsb
retq
.Lmemmove_end_forward_efs:
.previous
.section .altinstructions,"a"
altinstruction_entry .Lmemmove_begin_forward, \
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
.previous
ENDPROC(__memmove)
ENDPROC(memmove)

View file

@ -5,19 +5,30 @@
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
.weak memset
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
*
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
*/
ENTRY(memset)
ENTRY(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
*
* Otherwise, use original memset function.
*/
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memset_erms", X86_FEATURE_ERMS
movq %rdi,%r9
movq %rdx,%rcx
andl $7,%edx
@ -31,8 +42,8 @@
rep stosb
movq %r9,%rax
ret
.Lmemset_e:
.previous
ENDPROC(memset)
ENDPROC(__memset)
/*
* ISO C memset - set a memory block to a byte value. This function uses
@ -45,21 +56,16 @@
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
ENTRY(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
.Lmemset_e_e:
.previous
ENDPROC(memset_erms)
.weak memset
ENTRY(memset)
ENTRY(__memset)
ENTRY(memset_orig)
CFI_STARTPROC
movq %rdi,%r10
@ -134,23 +140,4 @@ ENTRY(__memset)
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
* It is recommended to use this when possible.
*
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a"
altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
.Lfinal-__memset,.Lmemset_e-.Lmemset_c
altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
.Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
.previous
ENDPROC(memset_orig)

View file

@ -14,8 +14,8 @@
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
CFI_STARTPROC
pushq_cfi %rbx
pushq_cfi %rbp
pushq_cfi_reg rbx
pushq_cfi_reg rbp
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
movl %ebp, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
popq_cfi %rbp
popq_cfi %rbx
popq_cfi_reg rbp
popq_cfi_reg rbx
ret
3:
CFI_RESTORE_STATE
@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
CFI_STARTPROC
pushl_cfi %ebx
pushl_cfi %ebp
pushl_cfi %esi
pushl_cfi %edi
pushl_cfi_reg ebx
pushl_cfi_reg ebp
pushl_cfi_reg esi
pushl_cfi_reg edi
pushl_cfi $0 /* Return value */
pushl_cfi %eax
movl 4(%eax), %ecx
@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
movl %esi, 24(%eax)
movl %edi, 28(%eax)
popl_cfi %eax
popl_cfi %edi
popl_cfi %esi
popl_cfi %ebp
popl_cfi %ebx
popl_cfi_reg edi
popl_cfi_reg esi
popl_cfi_reg ebp
popl_cfi_reg ebx
ret
3:
CFI_RESTORE_STATE

View file

@ -34,10 +34,10 @@
*/
#define save_common_regs \
pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
pushl_cfi_reg ecx
#define restore_common_regs \
popl_cfi %ecx; CFI_RESTORE ecx
popl_cfi_reg ecx
/* Avoid uglifying the argument copying x86-64 needs to do. */
.macro movq src, dst
@ -64,22 +64,22 @@
*/
#define save_common_regs \
pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
pushq_cfi %r11; CFI_REL_OFFSET r11, 0
pushq_cfi_reg rdi; \
pushq_cfi_reg rsi; \
pushq_cfi_reg rcx; \
pushq_cfi_reg r8; \
pushq_cfi_reg r9; \
pushq_cfi_reg r10; \
pushq_cfi_reg r11
#define restore_common_regs \
popq_cfi %r11; CFI_RESTORE r11; \
popq_cfi %r10; CFI_RESTORE r10; \
popq_cfi %r9; CFI_RESTORE r9; \
popq_cfi %r8; CFI_RESTORE r8; \
popq_cfi %rcx; CFI_RESTORE rcx; \
popq_cfi %rsi; CFI_RESTORE rsi; \
popq_cfi %rdi; CFI_RESTORE rdi
popq_cfi_reg r11; \
popq_cfi_reg r10; \
popq_cfi_reg r9; \
popq_cfi_reg r8; \
popq_cfi_reg rcx; \
popq_cfi_reg rsi; \
popq_cfi_reg rdi
#endif
@ -87,12 +87,10 @@
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
save_common_regs
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
CFI_REL_OFFSET __ASM_REG(dx), 0
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
movq %rax,%rdi
call rwsem_down_read_failed
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
CFI_RESTORE __ASM_REG(dx)
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
restore_common_regs
ret
CFI_ENDPROC
@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
save_common_regs
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
CFI_REL_OFFSET __ASM_REG(dx), 0
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
movq %rax,%rdi
call rwsem_downgrade_wake
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
CFI_RESTORE __ASM_REG(dx)
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
restore_common_regs
ret
CFI_ENDPROC

View file

@ -13,12 +13,9 @@
.globl \name
\name:
CFI_STARTPROC
pushl_cfi %eax
CFI_REL_OFFSET eax, 0
pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
pushl_cfi_reg eax
pushl_cfi_reg ecx
pushl_cfi_reg edx
.if \put_ret_addr_in_eax
/* Place EIP in the arg1 */
@ -26,12 +23,9 @@
.endif
call \func
popl_cfi %edx
CFI_RESTORE edx
popl_cfi %ecx
CFI_RESTORE ecx
popl_cfi %eax
CFI_RESTORE eax
popl_cfi_reg edx
popl_cfi_reg ecx
popl_cfi_reg eax
ret
CFI_ENDPROC
_ASM_NOKPROBE(\name)

View file

@ -17,9 +17,18 @@
CFI_STARTPROC
/* this one pushes 9 elems, the next one would be %rIP */
SAVE_ARGS
pushq_cfi_reg rdi
pushq_cfi_reg rsi
pushq_cfi_reg rdx
pushq_cfi_reg rcx
pushq_cfi_reg rax
pushq_cfi_reg r8
pushq_cfi_reg r9
pushq_cfi_reg r10
pushq_cfi_reg r11
.if \put_ret_addr_in_rdi
/* 9*8(%rsp) is return addr on stack */
movq_cfi_restore 9*8, rdi
.endif
@ -45,11 +54,22 @@
#endif
#endif
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPT)
CFI_STARTPROC
SAVE_ARGS
CFI_ADJUST_CFA_OFFSET 9*8
restore:
RESTORE_ARGS
popq_cfi_reg r11
popq_cfi_reg r10
popq_cfi_reg r9
popq_cfi_reg r8
popq_cfi_reg rax
popq_cfi_reg rcx
popq_cfi_reg rdx
popq_cfi_reg rsi
popq_cfi_reg rdi
ret
CFI_ENDPROC
_ASM_NOKPROBE(restore)
#endif

View file

@ -273,6 +273,9 @@ dd: ESC
de: ESC
df: ESC
# 0xe0 - 0xef
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
e0: LOOPNE/LOOPNZ Jb (f64)
e1: LOOPE/LOOPZ Jb (f64)
e2: LOOP Jb (f64)
@ -281,6 +284,10 @@ e4: IN AL,Ib
e5: IN eAX,Ib
e6: OUT Ib,AL
e7: OUT Ib,eAX
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
e8: CALL Jz (f64)
e9: JMP-near Jz (f64)
ea: JMP-far Ap (i64)
@ -456,6 +463,7 @@ AVXcode: 1
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
80: JO Jz (f64)
81: JNO Jz (f64)
82: JB/JC/JNAE Jz (f64)
@ -842,6 +850,7 @@ EndTable
GrpTable: Grp5
0: INC Ev
1: DEC Ev
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
2: CALLN Ev (f64)
3: CALLF Ep
4: JMPN Ev (f64)

View file

@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
if (kprobes_built_in() && !user_mode_vm(regs)) {
if (kprobes_built_in() && !user_mode(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
instr = (void *)convert_ip_to_linear(current, regs);
max_instr = instr + 15;
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
return 0;
while (instr < max_instr) {
@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
if (error_code & PF_USER)
return false;
if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
return false;
return true;
@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* User-mode registers count as a user access even for any
* potential system fault or CPU buglet:
*/
if (user_mode_vm(regs)) {
if (user_mode(regs)) {
local_irq_enable();
error_code |= PF_USER;
flags |= FAULT_FLAG_USER;

View file

@ -179,7 +179,8 @@ static void __init probe_page_size_mask(void)
if (cpu_has_pge) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
}
} else
__supported_pte_mask &= ~_PAGE_GLOBAL;
}
#ifdef CONFIG_X86_32

View file

@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
if (!user_mode_vm(regs)) {
if (!user_mode(regs)) {
unsigned long stack = kernel_stack_pointer(regs);
if (depth)
dump_trace(NULL, regs, (unsigned long *)stack, 0,

View file

@ -134,7 +134,7 @@ static void do_fpu_end(void)
static void fix_processor_context(void)
{
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
struct desc_struct *desc = get_cpu_gdt_table(cpu);
tss_desc tss;

View file

@ -119,7 +119,7 @@
110 i386 iopl sys_iopl
111 i386 vhangup sys_vhangup
112 i386 idle
113 i386 vm86old sys_vm86old sys32_vm86_warning
113 i386 vm86old sys_vm86old sys_ni_syscall
114 i386 wait4 sys_wait4 compat_sys_wait4
115 i386 swapoff sys_swapoff
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
@ -172,7 +172,7 @@
163 i386 mremap sys_mremap
164 i386 setresuid sys_setresuid16
165 i386 getresuid sys_getresuid16
166 i386 vm86 sys_vm86 sys32_vm86_warning
166 i386 vm86 sys_vm86 sys_ni_syscall
167 i386 query_module
168 i386 poll sys_poll
169 i386 nfsservctl

Some files were not shown because too many files have changed in this diff Show more