Merge branch 'kvm-svm-harden' into HEAD

This fixes three issues in nested SVM:

1) in the shutdown_interception() vmexit handler we call kvm_vcpu_reset().
However, if running nested and L1 doesn't intercept shutdown, the function
resets vcpu->arch.hflags without properly leaving the nested state.
This leaves the vCPU in inconsistent state and later triggers a kernel
panic in SVM code.  The same bug can likely be triggered by sending INIT
via local apic to a vCPU which runs a nested guest.

On VMX we are lucky that the issue can't happen because VMX always
intercepts triple faults, thus triple fault in L2 will always be
redirected to L1.  Plus, handle_triple_fault() doesn't reset the vCPU.
INIT IPI can't happen on VMX either because INIT events are masked while
in VMX mode.

Secondarily, KVM doesn't honour SHUTDOWN intercept bit of L1 on SVM.
A normal hypervisor should always intercept SHUTDOWN, a unit test on
the other hand might want to not do so.

Finally, the guest can trigger a kernel non rate limited printk on SVM
from the guest, which is fixed as well.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2022-11-17 11:42:35 -05:00
commit d79b483193
10 changed files with 172 additions and 57 deletions

View File

@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN))
return;
kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
}
@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm)
if (!svm->nested.initialized)
return;
if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
svm_switch_vmcb(svm, &svm->vmcb01);
svm_vcpu_free_msrpm(svm->nested.msrpm);
svm->nested.msrpm = NULL;
@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm)
svm->nested.initialized = false;
}
/*
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
*/
void svm_leave_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

View File

@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return 0;
}
static int is_external_interrupt(u32 info)
{
info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
}
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -1438,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
*/
svm_clear_current_vmcb(svm->vmcb);
svm_leave_nested(vcpu);
svm_free_nested(svm);
sev_free_vcpu(vcpu);
@ -3425,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 0;
}
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
"exit_code 0x%x\n",
__func__, svm->vmcb->control.exit_int_info,
exit_code);
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;

View File

@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
{
kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
}
@ -6440,9 +6441,6 @@ out:
return kvm_state.size;
}
/*
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
*/
void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu)) {

View File

@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
/* Forcibly leave the nested mode in cases like a vCPU reset */
static void kvm_leave_nested(struct kvm_vcpu *vcpu)
{
kvm_x86_ops.nested_ops->leave_nested(vcpu);
}
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
}
@ -9805,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
int kvm_check_nested_events(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
kvm_x86_ops.nested_ops->triple_fault(vcpu);
return 1;
}
@ -10560,15 +10566,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
if (is_guest_mode(vcpu)) {
if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
if (is_guest_mode(vcpu))
kvm_x86_ops.nested_ops->triple_fault(vcpu);
} else {
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
vcpu->mmio_needed = 0;
r = 0;
goto out;
}
goto out;
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
@ -11997,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
WARN_ON_ONCE(!init_event &&
(old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
/*
* SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's
* possible to INIT the vCPU while L2 is active. Force the vCPU back
* into L1 as EFER.SVME is cleared on INIT (along with all other EFER
* bits), i.e. virtualization is disabled.
*/
if (is_guest_mode(vcpu))
kvm_leave_nested(vcpu);
kvm_lapic_reset(vcpu, init_event);
WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;

View File

@ -41,6 +41,7 @@
/x86_64/svm_vmcall_test
/x86_64/svm_int_ctl_test
/x86_64/svm_nested_soft_inject_test
/x86_64/svm_nested_shutdown_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/tsc_scaling_sync

View File

@ -101,6 +101,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test

View File

@ -748,6 +748,19 @@ struct ex_regs {
uint64_t rflags;
};
struct idt_entry {
uint16_t offset0;
uint16_t selector;
uint16_t ist : 3;
uint16_t : 5;
uint16_t type : 4;
uint16_t : 1;
uint16_t dpl : 2;
uint16_t p : 1;
uint16_t offset1;
uint32_t offset2; uint32_t reserved;
};
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
void vm_install_exception_handler(struct kvm_vm *vm, int vector,

View File

@ -1074,19 +1074,6 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
}
}
struct idt_entry {
uint16_t offset0;
uint16_t selector;
uint16_t ist : 3;
uint16_t : 5;
uint16_t type : 4;
uint16_t : 1;
uint16_t dpl : 2;
uint16_t p : 1;
uint16_t offset1;
uint32_t offset2; uint32_t reserved;
};
static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
int dpl, unsigned short selector)
{

View File

@ -0,0 +1,67 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* svm_nested_shutdown_test
*
* Copyright (C) 2022, Red Hat, Inc.
*
* Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
*/
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "svm_util.h"
static void l2_guest_code(struct svm_test_data *svm)
{
__asm__ __volatile__("ud2");
}
static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
struct vmcb *vmcb = svm->vmcb;
generic_svm_setup(svm, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
idt[11].p = 0; // #NP is not intercepted and will cause another
// #NP that will be converted to #DF
idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
run_guest(vmcb, svm->vmcb_gpa);
/* should not reach here */
GUEST_ASSERT(0);
}
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_run *run;
vm_vaddr_t svm_gva;
struct kvm_vm *vm;
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpu);
vcpu_alloc_svm(vm, &svm_gva);
vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
run = vcpu->run;
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
"Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
kvm_vm_free(vm);
}

View File

@ -3,6 +3,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@ -20,10 +21,11 @@ static void l2_guest_code(void)
: : [port] "d" (ARBITRARY_IO_PORT) : "rax");
}
void l1_guest_code(struct vmx_pages *vmx)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
void l1_guest_code_vmx(struct vmx_pages *vmx)
{
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx)
GUEST_DONE();
}
void l1_guest_code_svm(struct svm_test_data *svm)
{
struct vmcb *vmcb = svm->vmcb;
generic_svm_setup(svm, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
/* don't intercept shutdown to test the case of SVM allowing to do so */
vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
run_guest(vmcb, svm->vmcb_gpa);
/* should not reach here, L1 should crash */
GUEST_ASSERT(0);
}
int main(void)
{
struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vcpu_events events;
vm_vaddr_t vmx_pages_gva;
struct ucall uc;
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
TEST_REQUIRE(has_vmx || has_svm);
TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
if (has_vmx) {
vm_vaddr_t vmx_pages_gva;
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vcpu, 1, vmx_pages_gva);
} else {
vm_vaddr_t svm_gva;
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
vcpu_alloc_svm(vm, &svm_gva);
vcpu_args_set(vcpu, 1, svm_gva);
}
vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
run = vcpu->run;
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vcpu, 1, vmx_pages_gva);
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@ -78,13 +109,21 @@ int main(void)
"No triple fault pending");
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
break;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
if (has_svm) {
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
"Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
} else {
switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
break;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
}
return 0;
}