linux-stable/arch/x86/kvm/svm/vmenter.S

382 lines
9.8 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
x86/retbleed: Add SKL return thunk To address the Intel SKL RSB underflow issue in software it's required to do call depth tracking. Provide a return thunk for call depth tracking on Intel SKL CPUs. The tracking does not use a counter. It uses uses arithmetic shift right on call entry and logical shift left on return. The depth tracking variable is initialized to 0x8000.... when the call depth is zero. The arithmetic shift right sign extends the MSB and saturates after the 12th call. The shift count is 5 so the tracking covers 12 nested calls. On return the variable is shifted left logically so it becomes zero again. CALL RET 0: 0x8000000000000000 0x0000000000000000 1: 0xfc00000000000000 0xf000000000000000 ... 11: 0xfffffffffffffff8 0xfffffffffffffc00 12: 0xffffffffffffffff 0xffffffffffffffe0 After a return buffer fill the depth is credited 12 calls before the next stuffing has to take place. There is a inaccuracy for situations like this: 10 calls 5 returns 3 calls 4 returns 3 calls .... The shift count might cause this to be off by one in either direction, but there is still a cushion vs. the RSB depth. The algorithm does not claim to be perfect, but it should obfuscate the problem enough to make exploitation extremly difficult. The theory behind this is: RSB is a stack with depth 16 which is filled on every call. On the return path speculation "pops" entries to speculate down the call chain. Once the speculative RSB is empty it switches to other predictors, e.g. the Branch History Buffer, which can be mistrained by user space and misguide the speculation path to a gadget. Call depth tracking is designed to break this speculation path by stuffing speculation trap calls into the RSB which are never getting a corresponding return executed. This stalls the prediction path until it gets resteered, The assumption is that stuffing at the 12th return is sufficient to break the speculation before it hits the underflow and the fallback to the other predictors. Testing confirms that it works. Johannes, one of the retbleed researchers. tried to attack this approach but failed. There is obviously no scientific proof that this will withstand future research progress, but all we can do right now is to speculate about it. The SAR/SHL usage was suggested by Andi Kleen. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20220915111147.890071690@infradead.org
2022-09-15 11:11:27 +00:00
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/frame.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "kvm-asm-offsets.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
/* Intentionally omit RAX as it's context switched by hardware */
#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE)
#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE)
#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE)
/* Intentionally omit RSP as it's context switched by hardware */
#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE)
#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE)
#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE)
#ifdef CONFIG_X86_64
#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE)
#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE)
#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE)
#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE)
#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE)
#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE)
#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE)
#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE)
#endif
#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa)
.section .noinstr.text, "ax"
.macro RESTORE_GUEST_SPEC_CTRL
/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
ALTERNATIVE_2 "", \
"jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
"", X86_FEATURE_V_SPEC_CTRL
801:
.endm
.macro RESTORE_GUEST_SPEC_CTRL_BODY
800:
/*
* SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
* host's, write the MSR. This is kept out-of-line so that the common
* case does not have to jump.
*
* IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
* there must not be any returns or indirect branches between this code
* and vmentry.
*/
movl SVM_spec_ctrl(%_ASM_DI), %eax
cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
je 801b
mov $MSR_IA32_SPEC_CTRL, %ecx
xor %edx, %edx
wrmsr
jmp 801b
.endm
.macro RESTORE_HOST_SPEC_CTRL
/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
ALTERNATIVE_2 "", \
"jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
"", X86_FEATURE_V_SPEC_CTRL
901:
.endm
.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
900:
/* Same for after vmexit. */
mov $MSR_IA32_SPEC_CTRL, %ecx
/*
* Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
* if it was not intercepted during guest execution.
*/
cmpb $0, \spec_ctrl_intercepted
jnz 998f
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
998:
/* Now restore the host value of the MSR if different from the guest's. */
movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
cmp SVM_spec_ctrl(%_ASM_DI), %eax
je 901b
xor %edx, %edx
wrmsr
jmp 901b
.endm
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
* @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
push %r15
push %r14
push %r13
push %r12
#else
push %edi
push %esi
#endif
push %_ASM_BX
/*
* Save variables needed after vmexit on the stack, in inverse
* order compared to when they are needed.
*/
/* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
push %_ASM_ARG2
/* Needed to restore access to percpu variables. */
__ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa)
/* Finally save @svm. */
push %_ASM_ARG1
.ifnc _ASM_ARG1, _ASM_DI
/*
* Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
* and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
*/
mov %_ASM_ARG1, %_ASM_DI
.endif
/* Clobbers RAX, RCX, RDX. */
RESTORE_GUEST_SPEC_CTRL
/*
* Use a single vmcb (vmcb01 because it's always valid) for
* context switching guest state via VMLOAD/VMSAVE, that way
* the state doesn't need to be copied between vmcb01 and
* vmcb02 when switching vmcbs for nested virtualization.
*/
mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
1: vmload %_ASM_AX
2:
/* Get svm->current_vmcb->pa into RAX. */
mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Load guest registers. */
mov VCPU_RCX(%_ASM_DI), %_ASM_CX
mov VCPU_RDX(%_ASM_DI), %_ASM_DX
mov VCPU_RBX(%_ASM_DI), %_ASM_BX
mov VCPU_RBP(%_ASM_DI), %_ASM_BP
mov VCPU_RSI(%_ASM_DI), %_ASM_SI
#ifdef CONFIG_X86_64
mov VCPU_R8 (%_ASM_DI), %r8
mov VCPU_R9 (%_ASM_DI), %r9
mov VCPU_R10(%_ASM_DI), %r10
mov VCPU_R11(%_ASM_DI), %r11
mov VCPU_R12(%_ASM_DI), %r12
mov VCPU_R13(%_ASM_DI), %r13
mov VCPU_R14(%_ASM_DI), %r14
mov VCPU_R15(%_ASM_DI), %r15
#endif
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
/* Enter guest mode */
sti
3: vmrun %_ASM_AX
4:
cli
/* Pop @svm to RAX while it's the only available register. */
pop %_ASM_AX
/* Save all guest registers. */
mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
#ifdef CONFIG_X86_64
mov %r8, VCPU_R8 (%_ASM_AX)
mov %r9, VCPU_R9 (%_ASM_AX)
mov %r10, VCPU_R10(%_ASM_AX)
mov %r11, VCPU_R11(%_ASM_AX)
mov %r12, VCPU_R12(%_ASM_AX)
mov %r13, VCPU_R13(%_ASM_AX)
mov %r14, VCPU_R14(%_ASM_AX)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
/* @svm can stay in RDI from now on. */
mov %_ASM_AX, %_ASM_DI
mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
5: vmsave %_ASM_AX
6:
/* Restores GSBASE among other things, allowing access to percpu data. */
pop %_ASM_AX
7: vmload %_ASM_AX
8:
#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
/* Clobbers RAX, RCX, RDX. */
RESTORE_HOST_SPEC_CTRL
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
* kernel. This should be done before re-enabling interrupts
* because interrupt handlers won't sanitize 'ret' if the return is
* from the kernel.
*/
UNTRAIN_RET_VM
/*
* Clear all general purpose registers except RSP and RAX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
* free. RSP and RAX are exempt as they are restored by hardware
* during VM-Exit.
*/
xor %ecx, %ecx
xor %edx, %edx
xor %ebx, %ebx
xor %ebp, %ebp
xor %esi, %esi
xor %edi, %edi
#ifdef CONFIG_X86_64
xor %r8d, %r8d
xor %r9d, %r9d
xor %r10d, %r10d
xor %r11d, %r11d
xor %r12d, %r12d
xor %r13d, %r13d
xor %r14d, %r14d
xor %r15d, %r15d
#endif
/* "Pop" @spec_ctrl_intercepted. */
pop %_ASM_BX
pop %_ASM_BX
#ifdef CONFIG_X86_64
pop %r12
pop %r13
pop %r14
pop %r15
#else
pop %esi
pop %edi
#endif
pop %_ASM_BP
RET
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
10: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
ud2
30: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 4b
ud2
50: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 6b
ud2
70: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 8b
ud2
_ASM_EXTABLE(1b, 10b)
_ASM_EXTABLE(3b, 30b)
_ASM_EXTABLE(5b, 50b)
_ASM_EXTABLE(7b, 70b)
SYM_FUNC_END(__svm_vcpu_run)
#ifdef CONFIG_KVM_AMD_SEV
#ifdef CONFIG_X86_64
#define SEV_ES_GPRS_BASE 0x300
#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
#endif
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
* @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
FRAME_BEGIN
/*
* Save non-volatile (callee-saved) registers to the host save area.
* Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
* saved on VMRUN.
*/
mov %rbp, SEV_ES_RBP (%rdx)
mov %r15, SEV_ES_R15 (%rdx)
mov %r14, SEV_ES_R14 (%rdx)
mov %r13, SEV_ES_R13 (%rdx)
mov %r12, SEV_ES_R12 (%rdx)
mov %rbx, SEV_ES_RBX (%rdx)
/*
* Save volatile registers that hold arguments that are needed after
* #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
*/
mov %rdi, SEV_ES_RDI (%rdx)
mov %rsi, SEV_ES_RSI (%rdx)
/* Clobbers RAX, RCX, RDX (@hostsa). */
RESTORE_GUEST_SPEC_CTRL
/* Get svm->current_vmcb->pa into RAX. */
mov SVM_current_vmcb(%rdi), %rax
mov KVM_VMCB_pa(%rax), %rax
/* Enter guest mode */
sti
1: vmrun %rax
2: cli
#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
/* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
RESTORE_HOST_SPEC_CTRL
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
* kernel. This should be done before re-enabling interrupts
* because interrupt handlers won't sanitize RET if the return is
* from the kernel.
*/
UNTRAIN_RET_VM
FRAME_END
RET
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY %sil
3: cmpb $0, kvm_rebooting(%rip)
jne 2b
ud2
_ASM_EXTABLE(1b, 3b)
SYM_FUNC_END(__svm_sev_es_vcpu_run)
#endif /* CONFIG_KVM_AMD_SEV */