KVM x86 misc changes for 6.8:

- Turn off KVM_WERROR by default for all configs so that it's not
    inadvertantly enabled by non-KVM developers, which can be problematic for
    subsystems that require no regressions for W=1 builds.
 
  - Advertise all of the host-supported CPUID bits that enumerate IA32_SPEC_CTRL
    "features".
 
  - Don't force a masterclock update when a vCPU synchronizes to the current TSC
    generation, as updating the masterclock can cause kvmclock's time to "jump"
    unexpectedly, e.g. when userspace hotplugs a pre-created vCPU.
 
  - Use RIP-relative address to read kvm_rebooting in the VM-Enter fault paths,
    partly as a super minor optimization, but mostly to make KVM play nice with
    position independent executable builds.
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmWW+7sSHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5/pwQAL8jIapIWP54VWxWlcTZFtCptGSobGlv
 cBS4L091/bYuMB/jO0pPtD+apzsYt3WmJ+tRsNA7Yctzh9BDE3XxbV7pKVIUpz9P
 TLCtYU2hPzp3vC6WCryjtU0OHxEnYMGHE1RCB7/bRblz+q6td7+MLZHcEUdwv83l
 3pVM5+tNyQBog40frEVf+z7wrXzz2FgnauJn70X1UUs40VuiTzi6FqfLn6QK95xQ
 8QPpjGFep7wQ6RgC4cPKiWSaP5PypCCpr4lMSKrKAf4iaKJdO1CYxEPeu0LcyFhR
 DUM3zb+AZ/FVrisRWUnjke4Epb87ikoMQBlflrI9+o4cNJQaxEHAzTMGO+u4oucy
 KwnXtNYM3lKGvDEvoUSBDphNayzcchn+0qk8YKB+XvClYSOtGi+NsWUB4x+M6crM
 960cidF/CzYZL/IDj9GW2Tb+IiPJarmazdbqDmMpQiAKz0KE3tezGiysB6d6VJs1
 V+KWOaSzAT9GsBKvGnPDHQaZ20vK+YsGB/TMWvpg3rFLTyV5QFM17UNdXyJlX0g8
 G0v+gf7j3MKm156H2yYW0XhIAfhstc1Xb8fTDQjJ3pZn6us2NAtFgnrIpbL31Z7E
 yaSgZuxetswbNwVSECUGlH4/zAtQudBfAt837Nu4eSCjMrJE4SPrrwpbTqp0SPXd
 1VZbGc70QFf7
 =O4hV
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-misc-6.8' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.8:

 - Turn off KVM_WERROR by default for all configs so that it's not
   inadvertantly enabled by non-KVM developers, which can be problematic for
   subsystems that require no regressions for W=1 builds.

 - Advertise all of the host-supported CPUID bits that enumerate IA32_SPEC_CTRL
   "features".

 - Don't force a masterclock update when a vCPU synchronizes to the current TSC
   generation, as updating the masterclock can cause kvmclock's time to "jump"
   unexpectedly, e.g. when userspace hotplugs a pre-created vCPU.

 - Use RIP-relative address to read kvm_rebooting in the VM-Enter fault paths,
   partly as a super minor optimization, but mostly to make KVM play nice with
   position independent executable builds.
This commit is contained in:
Paolo Bonzini 2024-01-08 08:10:04 -05:00
commit 33d0403fda
6 changed files with 69 additions and 40 deletions

View file

@ -62,13 +62,13 @@ config KVM
config KVM_WERROR
bool "Compile KVM with -Werror"
# KASAN may cause the build to fail due to larger frames
default y if X86_64 && !KASAN
# We use the dependency on !COMPILE_TEST to not be enabled
# blindly in allmodconfig or allyesconfig configurations
depends on KVM
depends on (X86_64 && !KASAN) || !COMPILE_TEST
depends on EXPERT
# Disallow KVM's -Werror if KASAN is enabled, e.g. to guard against
# randomized configs from selecting KVM_WERROR=y, which doesn't play
# nice with KASAN. KASAN builds generates warnings for the default
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
# Building KVM with -Werror and KASAN is still doable via enabling
# the kernel-wide WERROR=y.
depends on KVM && EXPERT && !KASAN
help
Add -Werror to the build flags for KVM.

View file

@ -685,6 +685,11 @@ void kvm_set_cpu_caps(void)
F(AMX_COMPLEX)
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) |
F(BHI_CTRL) | F(MCDT_NO)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
@ -966,13 +971,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
/* function 7 has additional index. */
case 7:
entry->eax = min(entry->eax, 1u);
max_idx = entry->eax = min(entry->eax, 2u);
cpuid_entry_override(entry, CPUID_7_0_EBX);
cpuid_entry_override(entry, CPUID_7_ECX);
cpuid_entry_override(entry, CPUID_7_EDX);
/* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
if (entry->eax == 1) {
/* KVM only supports up to 0x7.2, capped above via min(). */
if (max_idx >= 1) {
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
@ -982,6 +987,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = 0;
entry->ecx = 0;
}
if (max_idx >= 2) {
entry = do_host_cpuid(array, function, 2);
if (!entry)
goto out;
cpuid_entry_override(entry, CPUID_7_2_EDX);
entry->ecx = 0;
entry->ebx = 0;
entry->eax = 0;
}
break;
case 0xa: { /* Architectural Performance Monitoring */
union cpuid10_eax eax;

View file

@ -16,6 +16,7 @@ enum kvm_only_cpuid_leafs {
CPUID_7_1_EDX,
CPUID_8000_0007_EDX,
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@ -46,6 +47,14 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0)
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
@ -80,6 +89,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0007_EDX] = {0x80000007, 0, CPUID_EDX},
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
};
/*
@ -106,19 +116,20 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
*/
static __always_inline u32 __feature_translate(int x86_feature)
{
if (x86_feature == X86_FEATURE_SGX1)
return KVM_X86_FEATURE_SGX1;
else if (x86_feature == X86_FEATURE_SGX2)
return KVM_X86_FEATURE_SGX2;
else if (x86_feature == X86_FEATURE_SGX_EDECCSSA)
return KVM_X86_FEATURE_SGX_EDECCSSA;
else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
return KVM_X86_FEATURE_CONSTANT_TSC;
else if (x86_feature == X86_FEATURE_PERFMON_V2)
return KVM_X86_FEATURE_PERFMON_V2;
#define KVM_X86_TRANSLATE_FEATURE(f) \
case X86_FEATURE_##f: return KVM_X86_FEATURE_##f
switch (x86_feature) {
KVM_X86_TRANSLATE_FEATURE(SGX1);
KVM_X86_TRANSLATE_FEATURE(SGX2);
KVM_X86_TRANSLATE_FEATURE(SGX_EDECCSSA);
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
default:
return x86_feature;
}
}
static __always_inline u32 __feature_leaf(int x86_feature)
{

View file

@ -270,16 +270,16 @@ SYM_FUNC_START(__svm_vcpu_run)
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY
10: cmpb $0, kvm_rebooting
10: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
ud2
30: cmpb $0, kvm_rebooting
30: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 4b
ud2
50: cmpb $0, kvm_rebooting
50: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 6b
ud2
70: cmpb $0, kvm_rebooting
70: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 8b
ud2
@ -381,7 +381,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY
3: cmpb $0, kvm_rebooting
3: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
ud2

View file

@ -289,7 +289,7 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
RET
.Lfixup:
cmpb $0, kvm_rebooting
cmpb $0, _ASM_RIP(kvm_rebooting)
jne .Lvmfail
ud2
.Lvmfail:

View file

@ -2513,26 +2513,29 @@ static inline int gtod_is_based_on_tsc(int mode)
}
#endif
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu, bool new_generation)
{
#ifdef CONFIG_X86_64
bool vcpus_matched;
struct kvm_arch *ka = &vcpu->kvm->arch;
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&vcpu->kvm->online_vcpus));
/*
* To use the masterclock, the host clocksource must be based on TSC
* and all vCPUs must have matching TSCs. Note, the count for matching
* vCPUs doesn't include the reference vCPU, hence "+1".
*/
bool use_master_clock = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&vcpu->kvm->online_vcpus)) &&
gtod_is_based_on_tsc(gtod->clock.vclock_mode);
/*
* Once the masterclock is enabled, always perform request in
* order to update it.
*
* In order to enable masterclock, the host clocksource must be TSC
* and the vcpus need to have matched TSCs. When that happens,
* perform request to enable masterclock.
* Request a masterclock update if the masterclock needs to be toggled
* on/off, or when starting a new generation and the masterclock is
* enabled (compute_guest_tsc() requires the masterclock snapshot to be
* taken _after_ the new generation is created).
*/
if (ka->use_master_clock ||
(gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
if ((ka->use_master_clock && new_generation) ||
(ka->use_master_clock != use_master_clock))
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@ -2709,7 +2712,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
kvm_track_tsc_matching(vcpu);
kvm_track_tsc_matching(vcpu, !matched);
}
static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)