linux-stable/arch/x86/kvm/vmx/vmcs.h
Sean Christopherson 5a8781607e KVM: nVMX: Cache host_rsp on a per-VMCS basis
Currently, host_rsp is cached on a per-vCPU basis, i.e. it's stored in
struct vcpu_vmx.  In non-nested usage the caching is for all intents
and purposes 100% effective, e.g. only the first VMLAUNCH needs to
synchronize VMCS.HOST_RSP since the call stack to vmx_vcpu_run() is
identical each and every time.  But when running a nested guest, KVM
must invalidate the cache when switching the current VMCS as it can't
guarantee the new VMCS has the same HOST_RSP as the previous VMCS.  In
other words, the cache loses almost all of its efficacy when running a
nested VM.

Move host_rsp to struct vmcs_host_state, which is per-VMCS, so that it
is cached on a per-VMCS basis and restores its 100% hit rate when
nested VMs are in play.

Note that the host_rsp cache for vmcs02 essentially "breaks" when
nested early checks are enabled as nested_vmx_check_vmentry_hw() will
see a different RSP at the time of its VM-Enter.  While it's possible
to avoid even that VMCS.HOST_RSP synchronization, e.g. by employing a
dedicated VM-Exit stack, there is little motivation for doing so as
the overhead of two VMWRITEs (~55 cycles) is dwarfed by the overhead
of the extra VMX transition (600+ cycles) and is a proverbial drop in
the ocean relative to the total cost of a nested transtion (10s of
thousands of cycles).

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2019-02-12 13:12:22 +01:00

137 lines
3.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KVM_X86_VMX_VMCS_H
#define __KVM_X86_VMX_VMCS_H
#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/nospec.h>
#include <asm/kvm.h>
#include <asm/vmx.h>
#include "capabilities.h"
struct vmcs_hdr {
u32 revision_id:31;
u32 shadow_vmcs:1;
};
struct vmcs {
struct vmcs_hdr hdr;
u32 abort;
char data[0];
};
DECLARE_PER_CPU(struct vmcs *, current_vmcs);
/*
* vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
* and whose values change infrequently, but are not constant. I.e. this is
* used as a write-through cache of the corresponding VMCS fields.
*/
struct vmcs_host_state {
unsigned long cr3; /* May not match real cr3 */
unsigned long cr4; /* May not match real cr4 */
unsigned long gs_base;
unsigned long fs_base;
unsigned long rsp;
u16 fs_sel, gs_sel, ldt_sel;
#ifdef CONFIG_X86_64
u16 ds_sel, es_sel;
#endif
};
/*
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
* loaded on this CPU (so we can clear them if the CPU goes down).
*/
struct loaded_vmcs {
struct vmcs *vmcs;
struct vmcs *shadow_vmcs;
int cpu;
bool launched;
bool nmi_known_unmasked;
bool hv_timer_armed;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
struct vmcs_host_state host_state;
};
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
}
static inline bool is_debug(u32 intr_info)
{
return is_exception_n(intr_info, DB_VECTOR);
}
static inline bool is_breakpoint(u32 intr_info)
{
return is_exception_n(intr_info, BP_VECTOR);
}
static inline bool is_page_fault(u32 intr_info)
{
return is_exception_n(intr_info, PF_VECTOR);
}
static inline bool is_invalid_opcode(u32 intr_info)
{
return is_exception_n(intr_info, UD_VECTOR);
}
static inline bool is_gp_fault(u32 intr_info)
{
return is_exception_n(intr_info, GP_VECTOR);
}
static inline bool is_machine_check(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
}
/* Undocumented: icebp/int1 */
static inline bool is_icebp(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
}
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
}
enum vmcs_field_width {
VMCS_FIELD_WIDTH_U16 = 0,
VMCS_FIELD_WIDTH_U64 = 1,
VMCS_FIELD_WIDTH_U32 = 2,
VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
};
static inline int vmcs_field_width(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_WIDTH_U32;
return (field >> 13) & 0x3;
}
static inline int vmcs_field_readonly(unsigned long field)
{
return (((field >> 10) & 0x3) == 1);
}
#endif /* __KVM_X86_VMX_VMCS_H */