Bugfixes, including the fix for CVE-2020-2732 and a few

issues found by "make W=1".
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJeVBwcAAoJEL/70l94x66DB9AH/AxWhmtf6YVXMNyZjXydxa1f
 hYVm9wg9GCsZS+7cktMhq0/uDEu5IjaCv7d+bzIcYZdFAOcs5nBUUjn1LtVl9w1y
 48vobyOa8pXpORerBtZtaO1kt4sfFR63zm7uau32DzXrz3qpHlMUjPdL08A1e35V
 cSSPAHHsl9S1TbDryc/VUNCOgauJes6LHbd3CdeAXU6lzMBW8JWbF2b/MAkvHG6n
 Hw5LpicWSeTxoPjR4Oi0Yx3VKvWfS9608netSJmuCNsv36wrhzKR1iuyb3kNCkAy
 AIlALn4PZq1Y5i1INi/XIkpC8d9yTqt5heRxYwp+yHadWO6E7ZMlITfxLZii+mM=
 =7EpO
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Bugfixes, including the fix for CVE-2020-2732 and a few issues found
  by 'make W=1'"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: s390: rstify new ioctls in api.rst
  KVM: nVMX: Check IO instruction VM-exit conditions
  KVM: nVMX: Refactor IO bitmap checks into helper function
  KVM: nVMX: Don't emulate instructions in guest mode
  KVM: nVMX: Emulate MTF when performing instruction emulation
  KVM: fix error handling in svm_hardware_setup
  KVM: SVM: Fix potential memory leak in svm_cpu_init()
  KVM: apic: avoid calculating pending eoi from an uninitialized val
  KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested pinbased_ctls only when apicv is globally disabled
  KVM: nVMX: handle nested posted interrupts when apicv is disabled for L1
  kvm: x86: svm: Fix NULL pointer dereference when AVIC not enabled
  KVM: VMX: Add VMX_FEATURE_USR_WAIT_PAUSE
  KVM: nVMX: Hold KVM's srcu lock when syncing vmcs12->shadow
  KVM: x86: don't notify userspace IOAPIC on edge-triggered interrupt EOI
  kvm/emulate: fix a -Werror=cast-function-type
  KVM: x86: fix incorrect comparison in trace event
  KVM: nVMX: Fix some obsolete comments and grammar error
  KVM: x86: fix missing prototypes
  KVM: x86: enable -Werror
This commit is contained in:
Linus Torvalds 2020-02-24 11:48:17 -08:00
commit 63623fd449
19 changed files with 284 additions and 121 deletions

View File

@ -4611,35 +4611,38 @@ unpins the VPA pages and releases all the device pages that are used to
track the secure pages by hypervisor.
4.122 KVM_S390_NORMAL_RESET
---------------------------
Capability: KVM_CAP_S390_VCPU_RESETS
Architectures: s390
Type: vcpu ioctl
Parameters: none
Returns: 0
:Capability: KVM_CAP_S390_VCPU_RESETS
:Architectures: s390
:Type: vcpu ioctl
:Parameters: none
:Returns: 0
This ioctl resets VCPU registers and control structures according to
the cpu reset definition in the POP (Principles Of Operation).
4.123 KVM_S390_INITIAL_RESET
----------------------------
Capability: none
Architectures: s390
Type: vcpu ioctl
Parameters: none
Returns: 0
:Capability: none
:Architectures: s390
:Type: vcpu ioctl
:Parameters: none
:Returns: 0
This ioctl resets VCPU registers and control structures according to
the initial cpu reset definition in the POP. However, the cpu is not
put into ESA mode. This reset is a superset of the normal reset.
4.124 KVM_S390_CLEAR_RESET
--------------------------
Capability: KVM_CAP_S390_VCPU_RESETS
Architectures: s390
Type: vcpu ioctl
Parameters: none
Returns: 0
:Capability: KVM_CAP_S390_VCPU_RESETS
:Architectures: s390
:Type: vcpu ioctl
:Parameters: none
:Returns: 0
This ioctl resets VCPU registers and control structures according to
the clear cpu reset definition in the POP. However, the cpu is not put

View File

@ -292,6 +292,14 @@ enum x86emul_mode {
#define X86EMUL_SMM_MASK (1 << 6)
#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
/*
* fastop functions are declared as taking a never-defined fastop parameter,
* so they can't be called from C directly.
*/
struct fastop;
typedef void (*fastop_t)(struct fastop *);
struct x86_emulate_ctxt {
const struct x86_emulate_ops *ops;
@ -324,7 +332,10 @@ struct x86_emulate_ctxt {
struct operand src;
struct operand src2;
struct operand dst;
int (*execute)(struct x86_emulate_ctxt *ctxt);
union {
int (*execute)(struct x86_emulate_ctxt *ctxt);
fastop_t fop;
};
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
/*
* The following six fields are cleared together,

View File

@ -1122,6 +1122,7 @@ struct kvm_x86_ops {
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@ -1146,7 +1147,7 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);

View File

@ -72,7 +72,7 @@
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)

View File

@ -81,6 +81,7 @@
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
#endif /* _ASM_X86_VMXFEATURES_H */

View File

@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y += -Iarch/x86/kvm
ccflags-y += -Werror
KVM := ../../../virt/kvm

View File

@ -191,25 +191,6 @@
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8
/*
* fastop functions have a special calling convention:
*
* dst: rax (in/out)
* src: rdx (in/out)
* src2: rcx (in)
* flags: rflags (in/out)
* ex: rsi (in:fastop pointer, out:zero if exception)
*
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
* different operand sizes can be reached by calculation, rather than a jump
* table (which would be bigger than the code).
*
* fastop functions are declared as taking a never-defined fastop parameter,
* so they can't be called from C directly.
*/
struct fastop;
struct opcode {
u64 flags : 56;
u64 intercept : 8;
@ -311,8 +292,19 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
#define ON64(x)
#endif
typedef void (*fastop_t)(struct fastop *);
/*
* fastop functions have a special calling convention:
*
* dst: rax (in/out)
* src: rdx (in/out)
* src2: rcx (in)
* flags: rflags (in/out)
* ex: rsi (in:fastop pointer, out:zero if exception)
*
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
* different operand sizes can be reached by calculation, rather than a jump
* table (which would be bigger than the code).
*/
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define __FOP_FUNC(name) \
@ -5683,7 +5675,7 @@ special_insn:
if (ctxt->execute) {
if (ctxt->d & Fastop)
rc = fastop(ctxt, (fastop_t)ctxt->execute);
rc = fastop(ctxt, ctxt->fop);
else
rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)

View File

@ -417,7 +417,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
if (irq.level &&
if (irq.trig_mode &&
kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
irq.dest_id, irq.dest_mode))
__set_bit(irq.vector, ioapic_handled_vectors);

View File

@ -627,9 +627,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
if (pv_eoi_get_user(vcpu, &val) < 0)
if (pv_eoi_get_user(vcpu, &val) < 0) {
printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return false;
}
return val & 0x1;
}
@ -1046,11 +1048,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
if (vcpu->arch.apicv_active)
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
else {
if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}

View File

@ -339,7 +339,7 @@ TRACE_EVENT(
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
__field(u8, u)
__field(signed char, u)
),
TP_fast_assign(

View File

@ -1005,33 +1005,32 @@ static void svm_cpu_uninit(int cpu)
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd;
int r;
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
r = -ENOMEM;
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto err_1;
goto free_cpu_data;
if (svm_sev_enabled()) {
r = -ENOMEM;
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
sizeof(void *),
GFP_KERNEL);
if (!sd->sev_vmcbs)
goto err_1;
goto free_save_area;
}
per_cpu(svm_data, cpu) = sd;
return 0;
err_1:
free_save_area:
__free_page(sd->save_area);
free_cpu_data:
kfree(sd);
return r;
return -ENOMEM;
}
@ -1350,6 +1349,24 @@ static __init void svm_adjust_mmio_mask(void)
kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}
static void svm_hardware_teardown(void)
{
int cpu;
if (svm_sev_enabled()) {
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
sev_flush_asids();
}
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
iopm_base = 0;
}
static __init int svm_hardware_setup(void)
{
int cpu;
@ -1463,29 +1480,10 @@ static __init int svm_hardware_setup(void)
return 0;
err:
__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
iopm_base = 0;
svm_hardware_teardown();
return r;
}
static __exit void svm_hardware_unsetup(void)
{
int cpu;
if (svm_sev_enabled()) {
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
sev_flush_asids();
}
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
iopm_base = 0;
}
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@ -5232,6 +5230,9 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
bool activated = kvm_vcpu_apicv_active(vcpu);
if (!avic)
return;
if (activated) {
/**
* During AVIC temporary deactivation, guest could update
@ -5255,8 +5256,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
if (!vcpu->arch.apicv_active)
return -1;
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@ -5268,6 +5272,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
@ -7378,7 +7384,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.hardware_setup = svm_hardware_setup,
.hardware_unsetup = svm_hardware_unsetup,
.hardware_unsetup = svm_hardware_teardown,
.check_processor_compatibility = svm_check_processor_compat,
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
@ -7433,6 +7439,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
.update_emulated_instruction = NULL,
.set_interrupt_shadow = svm_set_interrupt_shadow,
.get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,

View File

@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0

View File

@ -3161,10 +3161,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
*
* Returns:
* NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
* NVMX_ENTRY_VMFAIL: Consistency check VMFail
* NVMX_ENTRY_VMEXIT: Consistency check VMExit
* NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
* NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
* NVMX_VMENTRY_VMFAIL: Consistency check VMFail
* NVMX_VMENTRY_VMEXIT: Consistency check VMExit
* NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry)
@ -3609,8 +3609,15 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
unsigned long exit_qual;
bool block_nested_events =
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
bool mtf_pending = vmx->nested.mtf_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
/*
* Clear the MTF state. If a higher priority VM-exit is delivered first,
* this state is discarded.
*/
vmx->nested.mtf_pending = false;
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
@ -3621,8 +3628,28 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
/*
* Process any exceptions that are not debug traps before MTF.
*/
if (vcpu->arch.exception.pending &&
nested_vmx_check_exception(vcpu, &exit_qual)) {
!vmx_pending_dbg_trap(vcpu) &&
nested_vmx_check_exception(vcpu, &exit_qual)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
return 0;
}
if (mtf_pending) {
if (block_nested_events)
return -EBUSY;
nested_vmx_update_pending_dbg(vcpu);
nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
return 0;
}
if (vcpu->arch.exception.pending &&
nested_vmx_check_exception(vcpu, &exit_qual)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
@ -5285,24 +5312,17 @@ fail:
return 1;
}
static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
/*
* Return true if an IO instruction with the specified port and size should cause
* a VM-exit into L1.
*/
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
int size)
{
unsigned long exit_qualification;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gpa_t bitmap, last_bitmap;
unsigned int port;
int size;
u8 b;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
last_bitmap = (gpa_t)-1;
b = -1;
@ -5329,8 +5349,26 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
return false;
}
static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
unsigned long exit_qualification;
unsigned short port;
int size;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
return nested_vmx_check_io_bitmaps(vcpu, port, size);
}
/*
* Return 1 if we should exit from L2 to L1 to handle an MSR access access,
* Return 1 if we should exit from L2 to L1 to handle an MSR access,
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
* disinterest in the current event (read or write a specific MSR) by using an
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
@ -5712,6 +5750,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx->nested.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
if (vmx->nested.mtf_pending)
kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
}
}
@ -5892,6 +5933,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmx->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
vmx->nested.mtf_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
@ -5949,8 +5993,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
bool apicv)
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@ -5977,7 +6020,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
(apicv ? PIN_BASED_POSTED_INTR : 0);
(enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;

View File

@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
bool apicv);
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
@ -34,6 +33,8 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
int size);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
@ -175,6 +176,11 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
static inline int nested_cpu_has_mtf(struct vmcs12 *vmcs12)
{
return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
}
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);

View File

@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
static bool __read_mostly enable_apicv = 1;
bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@ -1175,6 +1175,10 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->guest_msrs[i].mask);
}
if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
if (vmx->guest_state_loaded)
return;
@ -1599,6 +1603,40 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
return 1;
}
/*
* Recognizes a pending MTF VM-exit and records the nested state for later
* delivery.
*/
static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!is_guest_mode(vcpu))
return;
/*
* Per the SDM, MTF takes priority over debug-trap exceptions besides
* T-bit traps. As instruction emulation is completed (i.e. at the
* instruction boundary), any #DB exception pending delivery must be a
* debug-trap. Record the pending MTF state to be delivered in
* vmx_check_nested_events().
*/
if (nested_cpu_has_mtf(vmcs12) &&
(!vcpu->arch.exception.pending ||
vcpu->arch.exception.nr == DB_VECTOR))
vmx->nested.mtf_pending = true;
else
vmx->nested.mtf_pending = false;
}
static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
vmx_update_emulated_instruction(vcpu);
return skip_emulated_instruction(vcpu);
}
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
/*
@ -3818,24 +3856,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
return;
return 0;
if (!vcpu->arch.apicv_active)
return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return;
return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
return;
return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
return 0;
}
/*
@ -6482,8 +6525,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
/*
* We did this in prepare_switch_to_guest, because it needs to
* be within srcu_read_lock.
*/
WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@ -6757,8 +6803,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
vmx_capability.ept,
kvm_vcpu_apicv_active(vcpu));
vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@ -6839,8 +6884,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
enable_apicv);
nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@ -7101,6 +7145,39 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->req_immediate_exit = true;
}
static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned short port;
bool intercept;
int size;
if (info->intercept == x86_intercept_in ||
info->intercept == x86_intercept_ins) {
port = info->src_val;
size = info->dst_bytes;
} else {
port = info->dst_val;
size = info->src_bytes;
}
/*
* If the 'use IO bitmaps' VM-execution control is 0, IO instruction
* VM-exits depend on the 'unconditional IO exiting' VM-execution
* control.
*
* Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
*/
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
intercept = nested_cpu_has(vmcs12,
CPU_BASED_UNCOND_IO_EXITING);
else
intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
}
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@ -7108,19 +7185,31 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
switch (info->intercept) {
/*
* RDPID causes #UD if disabled through secondary execution controls.
* Because it is marked as EmulateOnUD, we need to intercept it here.
*/
if (info->intercept == x86_intercept_rdtscp &&
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
ctxt->exception.vector = UD_VECTOR;
ctxt->exception.error_code_valid = false;
return X86EMUL_PROPAGATE_FAULT;
}
case x86_intercept_rdtscp:
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
ctxt->exception.vector = UD_VECTOR;
ctxt->exception.error_code_valid = false;
return X86EMUL_PROPAGATE_FAULT;
}
break;
case x86_intercept_in:
case x86_intercept_ins:
case x86_intercept_out:
case x86_intercept_outs:
return vmx_check_intercept_io(vcpu, info);
/* TODO: check more intercepts... */
return X86EMUL_CONTINUE;
default:
break;
}
return X86EMUL_UNHANDLEABLE;
}
#ifdef CONFIG_X86_64
@ -7702,7 +7791,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
vmx_capability.ept, enable_apicv);
vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
@ -7786,7 +7875,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
.skip_emulated_instruction = vmx_skip_emulated_instruction,
.update_emulated_instruction = vmx_update_emulated_instruction,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
.patch_hypercall = vmx_patch_hypercall,

View File

@ -150,6 +150,9 @@ struct nested_vmx {
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/* Pending MTF VM-exit into L1. */
bool mtf_pending;
struct loaded_vmcs vmcs02;
/*

View File

@ -6891,6 +6891,8 @@ restart:
kvm_rip_write(vcpu, ctxt->eip);
if (r && ctxt->tf)
r = kvm_vcpu_do_singlestep(vcpu);
if (kvm_x86_ops->update_emulated_instruction)
kvm_x86_ops->update_emulated_instruction(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}

View File

@ -889,6 +889,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*