KVM x86 misc changes for 6.10:

- Advertise the max mappable GPA in the "guest MAXPHYADDR" CPUID field, which
    is unused by hardware, so that KVM can communicate its inability to map GPAs
    that set bits 51:48 due to lack of 5-level paging.  Guest firmware is
    expected to use the information to safely remap BARs in the uppermost GPA
    space, i.e to avoid placing a BAR at a legal, but unmappable, GPA.
 
  - Use vfree() instead of kvfree() for allocations that always use vcalloc()
    or __vcalloc().
 
  - Don't completely ignore same-value writes to immutable feature MSRs, as
    doing so results in KVM failing to reject accesses to MSR that aren't
    supposed to exist given the vCPU model and/or KVM configuration.
 
  - Don't mark APICv as being inhibited due to ABSENT if APICv is disabled
    KVM-wide to avoid confusing debuggers (KVM will never bother clearing the
    ABSENT inhibit, even if userspace enables in-kernel local APIC).
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmY+rlEACgkQOlYIJqCj
 N/3/xQ/7BvNl1aCJSIQy+yanCKK4wV0wWoY/hD+1wVge3zoaLZqLNHeR7fEa3vo+
 OSS/pOz+PT6DbkokZYjjVaGs6+pFqaYg5YvRE7SPbj903phm81H7v5ZLtwgOBcXx
 dG9cSLTaRhos0PxqoiLfmiGK5IDKmWuZyJzhw+nPh2YmxoRDO/4exsLA9xWWhQSh
 BjPf32cq69fn39Mo/KeANdLR1FEjvKItEty7St5r/OZFxejP8VPe1xuFxHPJn4U+
 FBbDe0DMXAPfoAQImBBhHUpm5Rp7Hwbh90tM8xY6rf3hvRZWmMCAX/Hx8C562M2b
 k6jB13gsoVesatT6lgKs2I0KGL7TSC0jLYG8aeREdBz6AEo5bkBegB5965MZYfGv
 T43i/zk+Ha5VIEURqE/CtocKF8AEjnUWLaIyL7VsDqaMslmaMdWzr8RouaO1snMT
 N/mfilzx9/rzltTV67TI8FSykPNxehwNoc9P8l+ulbW1KKIzpZCWxtIpQnT2TGdn
 89zAJ7LUbEAOnO+jMsJjld0fcNEmUqiqu9tezHuu0rVYErYqtfVhrWIf52r0AHDK
 HRY5FNcZzCE+8FFAVDNl92Of+mPeF47RELXNMLAT+1lm91ug4k62GF4UDw7hsbFo
 6+ductlj2DZlwxZVGKxKhBDxFg+AfsNCC1fZvYq+D/6ZE51eABo=
 =9RXP
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-misc-6.10' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.10:

 - Advertise the max mappable GPA in the "guest MAXPHYADDR" CPUID field, which
   is unused by hardware, so that KVM can communicate its inability to map GPAs
   that set bits 51:48 due to lack of 5-level paging.  Guest firmware is
   expected to use the information to safely remap BARs in the uppermost GPA
   space, i.e to avoid placing a BAR at a legal, but unmappable, GPA.

 - Use vfree() instead of kvfree() for allocations that always use vcalloc()
   or __vcalloc().

 - Don't completely ignore same-value writes to immutable feature MSRs, as
   doing so results in KVM failing to reject accesses to MSR that aren't
   supposed to exist given the vCPU model and/or KVM configuration.

 - Don't mark APICv as being inhibited due to ABSENT if APICv is disabled
   KVM-wide to avoid confusing debuggers (KVM will never bother clearing the
   ABSENT inhibit, even if userspace enables in-kernel local APIC).
This commit is contained in:
Paolo Bonzini 2024-05-12 03:18:44 -04:00
commit 7d41e24da2
7 changed files with 53 additions and 31 deletions

View File

@ -1232,9 +1232,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0x80000008: {
unsigned g_phys_as = (entry->eax >> 16) & 0xff;
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned phys_as = entry->eax & 0xff;
/*
* GuestPhysAddrSize (EAX[23:16]) is intended for software
* use.
*
* KVM's ABI is to report the effective MAXPHYADDR for the
* guest in PhysAddrSize (phys_as), and the maximum
* *addressable* GPA in GuestPhysAddrSize (g_phys_as).
*
* GuestPhysAddrSize is valid if and only if TDP is enabled,
* in which case the max GPA that can be addressed by KVM may
* be less than the max GPA that can be legally generated by
* the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't
* support 5-level TDP.
*/
unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned int phys_as, g_phys_as;
/*
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
@ -1242,16 +1255,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* reductions in MAXPHYADDR for memory encryption affect shadow
* paging, too.
*
* If TDP is enabled but an explicit guest MAXPHYADDR is not
* provided, use the raw bare metal MAXPHYADDR as reductions to
* the HPAs do not affect GPAs.
* If TDP is enabled, use the raw bare metal MAXPHYADDR as
* reductions to the HPAs do not affect GPAs. The max
* addressable GPA is the same as the max effective GPA, except
* that it's capped at 48 bits if 5-level TDP isn't supported
* (hardware processes bits 51:48 only when walking the fifth
* level page table).
*/
if (!tdp_enabled)
g_phys_as = boot_cpu_data.x86_phys_bits;
else if (!g_phys_as)
if (!tdp_enabled) {
phys_as = boot_cpu_data.x86_phys_bits;
g_phys_as = 0;
} else {
phys_as = entry->eax & 0xff;
g_phys_as = phys_as;
if (kvm_mmu_get_max_tdp_level() < 5)
g_phys_as = min(g_phys_as, 48);
}
entry->eax = g_phys_as | (virt_as << 8);
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);

View File

@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void)
return boot_cpu_data.x86_phys_bits;
}
u8 kvm_mmu_get_max_tdp_level(void);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);

View File

@ -5316,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
return max_tdp_level;
}
u8 kvm_mmu_get_max_tdp_level(void)
{
return tdp_root_level ? tdp_root_level : max_tdp_level;
}
static union kvm_mmu_page_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
union kvm_cpu_role cpu_role)

View File

@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
kvfree(slot->arch.gfn_write_track);
vfree(slot->arch.gfn_write_track);
slot->arch.gfn_write_track = NULL;
}

View File

@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition,
);
/*
* Tracepoint for VT-d posted-interrupts.
* Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
*/
TRACE_EVENT(kvm_pi_irte_update,
TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update,
__entry->set = set;
),
TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
"gvec: 0x%x, pi_desc_addr: 0x%llx",
__entry->set ? "enabled and being updated" : "disabled",
__entry->host_irq,

View File

@ -2233,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
/*
* Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
* not support modifying the guest vCPU model on the fly, e.g. changing
* the nVMX capabilities while L2 is running is nonsensical. Ignore
* the nVMX capabilities while L2 is running is nonsensical. Allow
* writes of the same value, e.g. to allow userspace to blindly stuff
* all MSRs when emulating RESET.
*/
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
if (do_get_msr(vcpu, index, &val) || *data != val)
return -EINVAL;
return 0;
}
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) &&
(do_get_msr(vcpu, index, &val) || *data != val))
return -EINVAL;
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
@ -10031,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
static void kvm_apicv_init(struct kvm *kvm)
{
unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons;
enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
APICV_INHIBIT_REASON_DISABLE;
set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
init_rwsem(&kvm->arch.apicv_update_lock);
set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true);
if (!enable_apicv)
set_or_clear_apicv_inhibit(inhibits,
APICV_INHIBIT_REASON_DISABLE, true);
}
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
@ -12805,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot)
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.rmap[i]);
vfree(slot->arch.rmap[i]);
slot->arch.rmap[i] = NULL;
}
}
@ -12817,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
memslot_rmap_free(slot);
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.lpage_info[i - 1]);
vfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
@ -12909,7 +12903,7 @@ out_free:
memslot_rmap_free(slot);
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.lpage_info[i - 1]);
vfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
return -ENOMEM;

View File

@ -974,7 +974,7 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
if (!memslot->dirty_bitmap)
return;
kvfree(memslot->dirty_bitmap);
vfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL;
}