New x86 features:

* Guest API and guest kernel support for SEV live migration
 
 * SEV and SEV-ES intra-host migration
 
 Bugfixes and cleanups for x86:
 
 * Fix misuse of gfn-to-pfn cache when recording guest steal time / preempted status
 
 * Fix selftests on APICv machines
 
 * Fix sparse warnings
 
 * Fix detection of KVM features in CPUID
 
 * Cleanups for bogus writes to MSR_KVM_PV_EOI_EN
 
 * Fixes and cleanups for MSR bitmap handling
 
 * Cleanups for INVPCID
 
 * Make x86 KVM_SOFT_MAX_VCPUS consistent with other architectures
 
 Bugfixes for ARM:
 
 * Fix finalization of host stage2 mappings
 
 * Tighten the return value of kvm_vcpu_preferred_target()
 
 * Make sure the extraction of ESR_ELx.EC is limited to architected bits
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmGO1usUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOsXgf/d2CgZmTxZii/pOt0JKGSDKwRdoti
 pfbmtXwTkagqg2tIJtEsxwvcc26Q2VLyg9mlEelX1I8KVlexSa8mDtbGWHLFilsc
 JIZY8lE96wtlr9AyuN06K44QingDMIbXzlxcwO+muS3zTlSPsNkPcaVDk5PL35sN
 Wy2GA6GCLWv6iTLCtlX5EzbcgkrR+Mypj4lGSdXGRqfBKVFOQjeVt+Q3YzOPuacS
 03NBlYOmRxTnAGXfeAVs0/zEa69u/dYjBmwDPe6ERma4u8thHv0U/fDAh5C/ES7q
 42Thes/JOYnEZiBQ1xZLsHqRII0achbJKZhmxqPwjRf/u6eXsfz0KY9FBg==
 =kN8U
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
 "New x86 features:

   - Guest API and guest kernel support for SEV live migration

   - SEV and SEV-ES intra-host migration

  Bugfixes and cleanups for x86:

   - Fix misuse of gfn-to-pfn cache when recording guest steal time /
     preempted status

   - Fix selftests on APICv machines

   - Fix sparse warnings

   - Fix detection of KVM features in CPUID

   - Cleanups for bogus writes to MSR_KVM_PV_EOI_EN

   - Fixes and cleanups for MSR bitmap handling

   - Cleanups for INVPCID

   - Make x86 KVM_SOFT_MAX_VCPUS consistent with other architectures

  Bugfixes for ARM:

   - Fix finalization of host stage2 mappings

   - Tighten the return value of kvm_vcpu_preferred_target()

   - Make sure the extraction of ESR_ELx.EC is limited to architected
     bits"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (34 commits)
  KVM: SEV: unify cgroup cleanup code for svm_vm_migrate_from
  KVM: x86: move guest_pv_has out of user_access section
  KVM: x86: Drop arbitrary KVM_SOFT_MAX_VCPUS
  KVM: Move INVPCID type check from vmx and svm to the common kvm_handle_invpcid()
  KVM: VMX: Add a helper function to retrieve the GPR index for INVPCID, INVVPID, and INVEPT
  KVM: nVMX: Clean up x2APIC MSR handling for L2
  KVM: VMX: Macrofy the MSR bitmap getters and setters
  KVM: nVMX: Handle dynamic MSR intercept toggling
  KVM: nVMX: Query current VMCS when determining if MSR bitmaps are in use
  KVM: x86: Don't update vcpu->arch.pv_eoi.msr_val when a bogus value was written to MSR_KVM_PV_EOI_EN
  KVM: x86: Rename kvm_lapic_enable_pv_eoi()
  KVM: x86: Make sure KVM_CPUID_FEATURES really are KVM_CPUID_FEATURES
  KVM: x86: Add helper to consolidate core logic of SET_CPUID{2} flows
  kvm: mmu: Use fast PF path for access tracking of huge pages when possible
  KVM: x86/mmu: Properly dereference rcu-protected TDP MMU sptep iterator
  KVM: x86: inhibit APICv when KVM_GUESTDBG_BLOCKIRQ active
  kvm: x86: Convert return type of *is_valid_rdpmc_ecx() to bool
  KVM: x86: Fix recording of guest steal time / preempted status
  selftest: KVM: Add intra host migration tests
  selftest: KVM: Add open sev dev helper
  ...
This commit is contained in:
Linus Torvalds 2021-11-13 10:01:10 -08:00
commit 4d6fe79fde
49 changed files with 1097 additions and 379 deletions

View File

@ -6911,6 +6911,20 @@ MAP_SHARED mmap will result in an -EINVAL return.
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
perform a bulk copy of tags to/from the guest.
7.29 KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
-------------------------------------
Architectures: x86 SEV enabled
Type: vm
Parameters: args[0] is the fd of the source vm
Returns: 0 on success
This capability enables userspace to migrate the encryption context from the VM
indicated by the fd to the VM this is called on.
This is intended to support intra-host migration of VMs between userspace VMMs,
upgrading the VMM process without interrupting the guest.
8. Other capabilities.
======================

View File

@ -68,6 +68,7 @@
#define ESR_ELx_EC_MAX (0x3F)
#define ESR_ELx_EC_SHIFT (26)
#define ESR_ELx_EC_WIDTH (6)
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)

View File

@ -584,7 +584,7 @@ struct kvm_vcpu_stat {
u64 exits;
};
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);

View File

@ -1389,12 +1389,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
case KVM_ARM_PREFERRED_TARGET: {
int err;
struct kvm_vcpu_init init;
err = kvm_vcpu_preferred_target(&init);
if (err)
return err;
kvm_vcpu_preferred_target(&init);
if (copy_to_user(argp, &init, sizeof(init)))
return -EFAULT;

View File

@ -869,13 +869,10 @@ u32 __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_GENERIC_V8;
}
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
{
u32 target = kvm_target_cpu();
if (target < 0)
return -ENODEV;
memset(init, 0, sizeof(*init));
/*
@ -885,8 +882,6 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
* target type.
*/
init->target = (__u32)target;
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)

View File

@ -44,7 +44,7 @@
el1_sync: // Guest trapped into EL2
mrs x0, esr_el2
lsr x0, x0, #ESR_ELx_EC_SHIFT
ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap

View File

@ -141,7 +141,7 @@ SYM_FUNC_END(__host_hvc)
.L__vect_start\@:
stp x0, x1, [sp, #-16]!
mrs x0, esr_el2
lsr x0, x0, #ESR_ELx_EC_SHIFT
ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
b.eq __host_hvc
b __host_exit

View File

@ -178,7 +178,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
phys = kvm_pte_to_phys(pte);
if (!addr_is_memory(phys))
return 0;
return -EINVAL;
/*
* Adjust the host stage-2 mappings to match the ownership attributes
@ -207,8 +207,18 @@ static int finalize_host_mappings(void)
.cb = finalize_host_mappings_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
};
int i, ret;
return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
for (i = 0; i < hyp_memblock_nr; i++) {
struct memblock_region *reg = &hyp_memory[i];
u64 start = (u64)hyp_phys_to_virt(reg->base);
ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker);
if (ret)
return ret;
}
return 0;
}
void __noreturn __pkvm_init_finalise(void)

View File

@ -474,7 +474,7 @@ bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
/**
/*
* Handler for protected VM restricted exceptions.
*
* Inject an undefined exception into the guest and return true to indicate that

View File

@ -38,7 +38,6 @@
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
#define KVM_MAX_VCPUS 1024
#define KVM_SOFT_MAX_VCPUS 710
/*
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
@ -725,6 +724,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
u32 kvm_cpuid_base;
u64 reserved_gpa_bits;
int maxphyaddr;
@ -748,7 +748,7 @@ struct kvm_vcpu_arch {
u8 preempted;
u64 msr_val;
u64 last_steal;
struct gfn_to_pfn_cache cache;
struct gfn_to_hva_cache cache;
} st;
u64 l1_tsc_offset;
@ -1034,6 +1034,7 @@ struct kvm_x86_msr_filter {
#define APICV_INHIBIT_REASON_IRQWIN 3
#define APICV_INHIBIT_REASON_PIT_REINJ 4
#define APICV_INHIBIT_REASON_X2APIC 5
#define APICV_INHIBIT_REASON_BLOCKIRQ 6
struct kvm_arch {
unsigned long n_used_mmu_pages;
@ -1476,6 +1477,7 @@ struct kvm_x86_ops {
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*get_msr_feature)(struct kvm_msr_entry *entry);

View File

@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
asm volatile("vmmcall"
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
: "memory");
return ret;
}
#ifdef CONFIG_KVM_GUEST
void kvmclock_init(void);
void kvmclock_disable(void);

View File

@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
bool enc);
void __init mem_encrypt_free_decrypted_mem(void);
@ -78,6 +80,8 @@ static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline void __init
early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
static inline void mem_encrypt_free_decrypted_mem(void) { }

View File

@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
PVOP_VCALL1(mmu.exit_mmap, mm);
}
static inline void notify_page_enc_status_changed(unsigned long pfn,
int npages, bool enc)
{
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
}
#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{

View File

@ -168,6 +168,7 @@ struct pv_mmu_ops {
/* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm);
void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
#ifdef CONFIG_PARAVIRT_XXL
struct paravirt_callee_save read_cr2;

View File

@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
#endif
#define for_each_possible_hypervisor_cpuid_base(function) \
for (function = 0x40000000; function < 0x40010000; function += 0x100)
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
uint32_t base, eax, signature[3];
for (base = 0x40000000; base < 0x40010000; base += 0x100) {
for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
if (!memcmp(sig, signature, 12) &&

View File

@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly;

View File

@ -8,6 +8,7 @@
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
* a particular paravirtualization, the appropriate feature bit should

View File

@ -28,6 +28,7 @@
#include <linux/swait.h>
#include <linux/syscore_ops.h>
#include <linux/cc_platform.h>
#include <linux/efi.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@ -41,6 +42,7 @@
#include <asm/ptrace.h>
#include <asm/reboot.h>
#include <asm/svm.h>
#include <asm/e820/api.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
kvm_pv_disable_apf();
if (!shutdown)
apf_task_wake_all();
@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
__send_ipi_mask(local_mask, vector);
}
static int __init setup_efi_kvm_sev_migration(void)
{
efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
efi_status_t status;
unsigned long size;
bool enabled;
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
!kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
return 0;
if (!efi_enabled(EFI_BOOT))
return 0;
if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
pr_info("%s : EFI runtime services are not enabled\n", __func__);
return 0;
}
size = sizeof(enabled);
/* Get variable contents into buffer */
status = efi.get_variable(efi_sev_live_migration_enabled,
&efi_variable_guid, NULL, &size, &enabled);
if (status == EFI_NOT_FOUND) {
pr_info("%s : EFI live migration variable not found\n", __func__);
return 0;
}
if (status != EFI_SUCCESS) {
pr_info("%s : EFI variable retrieval failed\n", __func__);
return 0;
}
if (enabled == 0) {
pr_info("%s: live migration disabled in EFI\n", __func__);
return 0;
}
pr_info("%s : live migration enabled in EFI\n", __func__);
wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
return 1;
}
late_initcall(setup_efi_kvm_sev_migration);
/*
* Set the IPI entry points
*/
@ -756,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
return 0; /* So we don't blow up on old processors */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
return 0;
}
@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
}
static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
{
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
}
static void __init kvm_init_platform(void)
{
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
unsigned long nr_pages;
int i;
pv_ops.mmu.notify_page_enc_status_changed =
kvm_sev_hc_page_enc_status;
/*
* Reset the host's shared pages list related to kernel
* specific page encryption status settings before we load a
* new kernel by kexec. Reset the page encryption status
* during early boot intead of just before kexec to avoid SMP
* races during kvm_pv_guest_cpu_reboot().
* NOTE: We cannot reset the complete shared pages list
* here as we need to retain the UEFI/OVMF firmware
* specific settings.
*/
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
if (entry->type != E820_TYPE_RAM)
continue;
nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
nr_pages,
KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
}
/*
* Ensure that _bss_decrypted section is marked as decrypted in the
* shared pages list.
*/
nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
PAGE_SIZE);
early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
nr_pages, 0);
/*
* If not booted using EFI, enable Live migration support.
*/
if (!efi_enabled(EFI_BOOT))
wrmsrl(MSR_KVM_MIGRATION_CONTROL,
KVM_MIGRATION_READY);
}
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
}

View File

@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
.mmu.exit_mmap = paravirt_nop,
.mmu.notify_page_enc_status_changed = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),

View File

@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return 0;
}
static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
{
u32 function;
struct kvm_cpuid_entry2 *entry;
vcpu->arch.kvm_cpuid_base = 0;
for_each_possible_hypervisor_cpuid_base(function) {
entry = kvm_find_cpuid_entry(vcpu, function, 0);
if (entry) {
u32 signature[3];
signature[0] = entry->ebx;
signature[1] = entry->ecx;
signature[2] = entry->edx;
BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
vcpu->arch.kvm_cpuid_base = function;
break;
}
}
}
}
struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
u32 base = vcpu->arch.kvm_cpuid_base;
if (!base)
return NULL;
return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
/*
* save the feature bitmap to avoid cpuid lookup for every PV
@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
best = kvm_find_kvm_cpuid_features(vcpu);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
}
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
int r;
r = kvm_check_cpuid(e2, nent);
if (r)
return r;
kvfree(vcpu->arch.cpuid_entries);
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = nent;
kvm_update_kvm_cpuid_base(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
return 0;
}
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
e2[i].padding[2] = 0;
}
r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
if (r)
kvfree(e2);
goto out_free_cpuid;
}
kvfree(vcpu->arch.cpuid_entries);
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
out_free_cpuid:
kvfree(e);
@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return PTR_ERR(e2);
}
r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
if (r)
kvfree(e2);
return r;
}
kvfree(vcpu->arch.cpuid_entries);
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
return 0;
return r;
}
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case KVM_CPUID_SIGNATURE: {
static const char signature[12] = "KVMKVMKVM\0\0";
const u32 *sigptr = (const u32 *)signature;
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];

View File

@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv_vcpu->hv_vapic = data;
if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu,
if (kvm_lapic_set_pv_eoi(vcpu,
gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
sizeof(struct hv_vp_assist_page)))
return 1;

View File

@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
unsigned long new_len;
int ret;
if (!IS_ALIGNED(addr, 4))
return 1;
if (data & KVM_MSR_ENABLED) {
if (addr == ghc->gpa && len <= ghc->len)
new_len = ghc->len;
else
new_len = len;
ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
if (ret)
return ret;
}
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
if (addr == ghc->gpa && len <= ghc->len)
new_len = ghc->len;
else
new_len = len;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
return 0;
}
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)

View File

@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);
#define VEC_POS(v) ((v) & (32 - 1))

View File

@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
new_spte |= PT_WRITABLE_MASK;
/*
* Do not fix write-permission on the large spte. Since
* we only dirty the first page into the dirty-bitmap in
* Do not fix write-permission on the large spte when
* dirty logging is enabled. Since we only dirty the
* first page into the dirty-bitmap in
* fast_pf_fix_direct_spte(), other pages are missed
* if its slot has dirty logging enabled.
*
* Instead, we let the slow page fault path create a
* normal spte to fix the access.
*
* See the comments in kvm_arch_commit_memory_region().
*/
if (sp->role.level > PG_LEVEL_4K)
if (sp->role.level > PG_LEVEL_4K &&
kvm_slot_dirty_track_enabled(fault->slot))
break;
}

View File

@ -897,7 +897,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault,
struct tdp_iter *iter)
{
struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
u64 new_spte;
int ret = RET_PF_FIXED;
bool wrprot = false;

View File

@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
}
/* check if idx is a valid index to access PMU */
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}

View File

@ -32,7 +32,7 @@ struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);

View File

@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
BIT(APICV_INHIBIT_REASON_NESTED) |
BIT(APICV_INHIBIT_REASON_IRQWIN) |
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC);
BIT(APICV_INHIBIT_REASON_X2APIC) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
return supported & BIT(bit);
}

View File

@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
idx &= ~(3u << 30);
return (idx >= pmu->nr_arch_gp_counters);
return idx < pmu->nr_arch_gp_counters;
}
/* idx is the ECX register of RDPMC instruction */

View File

@ -120,16 +120,26 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
return true;
}
static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
{
enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
return misc_cg_try_charge(type, sev->misc_cg, 1);
}
static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
{
enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
misc_cg_uncharge(type, sev->misc_cg, 1);
}
static int sev_asid_new(struct kvm_sev_info *sev)
{
int asid, min_asid, max_asid, ret;
bool retry = true;
enum misc_res_type type;
type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
ret = misc_cg_try_charge(type, sev->misc_cg, 1);
ret = sev_misc_cg_try_charge(sev);
if (ret) {
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
@ -162,7 +172,7 @@ again:
return asid;
e_uncharge:
misc_cg_uncharge(type, sev->misc_cg, 1);
sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
return ret;
@ -179,7 +189,6 @@ static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
int cpu;
enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
@ -192,8 +201,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
mutex_unlock(&sev_bitmap_lock);
type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
misc_cg_uncharge(type, sev->misc_cg, 1);
sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
}
@ -590,7 +598,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
* traditional VMSA as it has been built so far (in prep
* for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
*/
memcpy(svm->vmsa, save, sizeof(*save));
memcpy(svm->sev_es.vmsa, save, sizeof(*save));
return 0;
}
@ -612,11 +620,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
* the VMSA memory content (i.e it will write the same memory region
* with the guest's key), so invalidate it first.
*/
clflush_cache_range(svm->vmsa, PAGE_SIZE);
clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
vmsa.reserved = 0;
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
vmsa.address = __sme_pa(svm->vmsa);
vmsa.address = __sme_pa(svm->sev_es.vmsa);
vmsa.len = PAGE_SIZE;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
if (ret)
@ -1536,6 +1544,201 @@ static bool cmd_allowed_from_miror(u32 cmd_id)
return false;
}
static int sev_lock_for_migration(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
/*
* Bail if this VM is already involved in a migration to avoid deadlock
* between two VMs trying to migrate to/from each other.
*/
if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
return -EBUSY;
mutex_lock(&kvm->lock);
return 0;
}
static void sev_unlock_after_migration(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
mutex_unlock(&kvm->lock);
atomic_set_release(&sev->migration_in_progress, 0);
}
static int sev_lock_vcpus_for_migration(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i, j;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mutex_lock_killable(&vcpu->mutex))
goto out_unlock;
}
return 0;
out_unlock:
kvm_for_each_vcpu(j, vcpu, kvm) {
if (i == j)
break;
mutex_unlock(&vcpu->mutex);
}
return -EINTR;
}
static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i;
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_unlock(&vcpu->mutex);
}
}
static void sev_migrate_from(struct kvm_sev_info *dst,
struct kvm_sev_info *src)
{
dst->active = true;
dst->asid = src->asid;
dst->handle = src->handle;
dst->pages_locked = src->pages_locked;
src->asid = 0;
src->active = false;
src->handle = 0;
src->pages_locked = 0;
INIT_LIST_HEAD(&dst->regions_list);
list_replace_init(&src->regions_list, &dst->regions_list);
}
static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
{
int i;
struct kvm_vcpu *dst_vcpu, *src_vcpu;
struct vcpu_svm *dst_svm, *src_svm;
if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
return -EINVAL;
kvm_for_each_vcpu(i, src_vcpu, src) {
if (!src_vcpu->arch.guest_state_protected)
return -EINVAL;
}
kvm_for_each_vcpu(i, src_vcpu, src) {
src_svm = to_svm(src_vcpu);
dst_vcpu = kvm_get_vcpu(dst, i);
dst_svm = to_svm(dst_vcpu);
/*
* Transfer VMSA and GHCB state to the destination. Nullify and
* clear source fields as appropriate, the state now belongs to
* the destination.
*/
memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
dst_vcpu->arch.guest_state_protected = true;
memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
src_vcpu->arch.guest_state_protected = false;
}
to_kvm_svm(src)->sev_info.es_active = false;
to_kvm_svm(dst)->sev_info.es_active = true;
return 0;
}
int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
{
struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *src_sev, *cg_cleanup_sev;
struct file *source_kvm_file;
struct kvm *source_kvm;
bool charged = false;
int ret;
ret = sev_lock_for_migration(kvm);
if (ret)
return ret;
if (sev_guest(kvm)) {
ret = -EINVAL;
goto out_unlock;
}
source_kvm_file = fget(source_fd);
if (!file_is_kvm(source_kvm_file)) {
ret = -EBADF;
goto out_fput;
}
source_kvm = source_kvm_file->private_data;
ret = sev_lock_for_migration(source_kvm);
if (ret)
goto out_fput;
if (!sev_guest(source_kvm)) {
ret = -EINVAL;
goto out_source;
}
src_sev = &to_kvm_svm(source_kvm)->sev_info;
dst_sev->misc_cg = get_current_misc_cg();
cg_cleanup_sev = dst_sev;
if (dst_sev->misc_cg != src_sev->misc_cg) {
ret = sev_misc_cg_try_charge(dst_sev);
if (ret)
goto out_dst_cgroup;
charged = true;
}
ret = sev_lock_vcpus_for_migration(kvm);
if (ret)
goto out_dst_cgroup;
ret = sev_lock_vcpus_for_migration(source_kvm);
if (ret)
goto out_dst_vcpu;
if (sev_es_guest(source_kvm)) {
ret = sev_es_migrate_from(kvm, source_kvm);
if (ret)
goto out_source_vcpu;
}
sev_migrate_from(dst_sev, src_sev);
kvm_vm_dead(source_kvm);
cg_cleanup_sev = src_sev;
ret = 0;
out_source_vcpu:
sev_unlock_vcpus_for_migration(source_kvm);
out_dst_vcpu:
sev_unlock_vcpus_for_migration(kvm);
out_dst_cgroup:
/* Operates on the source on success, on the destination on failure. */
if (charged)
sev_misc_cg_uncharge(cg_cleanup_sev);
put_misc_cg(cg_cleanup_sev->misc_cg);
cg_cleanup_sev->misc_cg = NULL;
out_source:
sev_unlock_after_migration(source_kvm);
out_fput:
if (source_kvm_file)
fput(source_kvm_file);
out_unlock:
sev_unlock_after_migration(kvm);
return ret;
}
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@ -2038,16 +2241,16 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
if (vcpu->arch.guest_state_protected)
sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
__free_page(virt_to_page(svm->vmsa));
sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
__free_page(virt_to_page(svm->sev_es.vmsa));
if (svm->ghcb_sa_free)
kfree(svm->ghcb_sa);
if (svm->sev_es.ghcb_sa_free)
kfree(svm->sev_es.ghcb_sa);
}
static void dump_ghcb(struct vcpu_svm *svm)
{
struct ghcb *ghcb = svm->ghcb;
struct ghcb *ghcb = svm->sev_es.ghcb;
unsigned int nbits;
/* Re-use the dump_invalid_vmcb module parameter */
@ -2073,7 +2276,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
struct ghcb *ghcb = svm->ghcb;
struct ghcb *ghcb = svm->sev_es.ghcb;
/*
* The GHCB protocol so far allows for the following data
@ -2093,7 +2296,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
struct ghcb *ghcb = svm->ghcb;
struct ghcb *ghcb = svm->sev_es.ghcb;
u64 exit_code;
/*
@ -2140,7 +2343,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
struct ghcb *ghcb;
u64 exit_code = 0;
ghcb = svm->ghcb;
ghcb = svm->sev_es.ghcb;
/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage)
@ -2258,33 +2461,34 @@ vmgexit_err:
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
if (!svm->ghcb)
if (!svm->sev_es.ghcb)
return;
if (svm->ghcb_sa_free) {
if (svm->sev_es.ghcb_sa_free) {
/*
* The scratch area lives outside the GHCB, so there is a
* buffer that, depending on the operation performed, may
* need to be synced, then freed.
*/
if (svm->ghcb_sa_sync) {
if (svm->sev_es.ghcb_sa_sync) {
kvm_write_guest(svm->vcpu.kvm,
ghcb_get_sw_scratch(svm->ghcb),
svm->ghcb_sa, svm->ghcb_sa_len);
svm->ghcb_sa_sync = false;
ghcb_get_sw_scratch(svm->sev_es.ghcb),
svm->sev_es.ghcb_sa,
svm->sev_es.ghcb_sa_len);
svm->sev_es.ghcb_sa_sync = false;
}
kfree(svm->ghcb_sa);
svm->ghcb_sa = NULL;
svm->ghcb_sa_free = false;
kfree(svm->sev_es.ghcb_sa);
svm->sev_es.ghcb_sa = NULL;
svm->sev_es.ghcb_sa_free = false;
}
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
sev_es_sync_to_ghcb(svm);
kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
svm->ghcb = NULL;
kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
svm->sev_es.ghcb = NULL;
}
void pre_sev_run(struct vcpu_svm *svm, int cpu)
@ -2314,7 +2518,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct ghcb *ghcb = svm->ghcb;
struct ghcb *ghcb = svm->sev_es.ghcb;
u64 ghcb_scratch_beg, ghcb_scratch_end;
u64 scratch_gpa_beg, scratch_gpa_end;
void *scratch_va;
@ -2350,7 +2554,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return false;
}
scratch_va = (void *)svm->ghcb;
scratch_va = (void *)svm->sev_es.ghcb;
scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
} else {
/*
@ -2380,12 +2584,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
* the vCPU next time (i.e. a read was requested so the data
* must be written back to the guest memory).
*/
svm->ghcb_sa_sync = sync;
svm->ghcb_sa_free = true;
svm->sev_es.ghcb_sa_sync = sync;
svm->sev_es.ghcb_sa_free = true;
}
svm->ghcb_sa = scratch_va;
svm->ghcb_sa_len = len;
svm->sev_es.ghcb_sa = scratch_va;
svm->sev_es.ghcb_sa_len = len;
return true;
}
@ -2504,15 +2708,15 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
return -EINVAL;
}
if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
/* Unable to map GHCB from guest */
vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
ghcb_gpa);
return -EINVAL;
}
svm->ghcb = svm->ghcb_map.hva;
ghcb = svm->ghcb_map.hva;
svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
ghcb = svm->sev_es.ghcb_map.hva;
trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
@ -2535,7 +2739,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_sev_es_mmio_read(vcpu,
control->exit_info_1,
control->exit_info_2,
svm->ghcb_sa);
svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_MMIO_WRITE:
if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
@ -2544,7 +2748,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_sev_es_mmio_write(vcpu,
control->exit_info_1,
control->exit_info_2,
svm->ghcb_sa);
svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_NMI_COMPLETE:
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
@ -2604,7 +2808,8 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
if (!setup_vmgexit_scratch(svm, in, bytes))
return -EINVAL;
return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
count, in);
}
void sev_es_init_vmcb(struct vcpu_svm *svm)
@ -2619,7 +2824,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
* VMCB page. Do not include the encryption mask on the VMSA physical
* address since hardware will access it using the guest key.
*/
svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@ -2691,8 +2896,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
struct vcpu_svm *svm = to_svm(vcpu);
/* First SIPI: Use the values as initially set by the VMM */
if (!svm->received_first_sipi) {
svm->received_first_sipi = true;
if (!svm->sev_es.received_first_sipi) {
svm->sev_es.received_first_sipi = true;
return;
}
@ -2701,8 +2906,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
* non-zero value.
*/
if (!svm->ghcb)
if (!svm->sev_es.ghcb)
return;
ghcb_set_sw_exit_info_2(svm->ghcb, 1);
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}

View File

@ -1452,7 +1452,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
if (vmsa_page)
svm->vmsa = page_address(vmsa_page);
svm->sev_es.vmsa = page_address(vmsa_page);
svm->guest_state_loaded = false;
@ -2835,11 +2835,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
ghcb_set_sw_exit_info_1(svm->ghcb, 1);
ghcb_set_sw_exit_info_2(svm->ghcb,
ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
X86_TRAP_GP |
SVM_EVTINJ_TYPE_EXEPT |
SVM_EVTINJ_VALID);
@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;
if (type > 3) {
kvm_inject_gp(vcpu, 0);
return 1;
}
return kvm_handle_invpcid(vcpu, type, gva);
}
@ -4701,6 +4696,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_unreg_region = svm_unregister_enc_region,
.vm_copy_enc_context_from = svm_vm_copy_asid_from,
.vm_move_enc_context_from = svm_vm_migrate_from,
.can_emulate_instruction = svm_can_emulate_instruction,

View File

@ -80,6 +80,7 @@ struct kvm_sev_info {
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
atomic_t migration_in_progress;
};
struct kvm_svm {
@ -123,6 +124,20 @@ struct svm_nested_state {
bool initialized;
};
struct vcpu_sev_es_state {
/* SEV-ES support */
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
bool received_first_sipi;
/* SEV-ES scratch area support */
void *ghcb_sa;
u32 ghcb_sa_len;
bool ghcb_sa_sync;
bool ghcb_sa_free;
};
struct vcpu_svm {
struct kvm_vcpu vcpu;
/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
@ -186,17 +201,7 @@ struct vcpu_svm {
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
} shadow_msr_intercept;
/* SEV-ES support */
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
bool received_first_sipi;
/* SEV-ES scratch area support */
void *ghcb_sa;
u32 ghcb_sa_len;
bool ghcb_sa_sync;
bool ghcb_sa_free;
struct vcpu_sev_es_state sev_es;
bool guest_state_loaded;
};
@ -558,6 +563,7 @@ int svm_register_enc_region(struct kvm *kvm,
int svm_unregister_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);

View File

@ -525,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
}
/*
* Check if MSR is intercepted for L01 MSR bitmap.
* For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1
* itself utilizing x2APIC. All MSRs were previously set to be intercepted,
* only the "disable intercept" case needs to be handled.
*/
static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
unsigned long *msr_bitmap_l0,
u32 msr, int type)
{
unsigned long *msr_bitmap;
int f = sizeof(unsigned long);
if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
if (!cpu_has_vmx_msr_bitmap())
return true;
msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
if (msr <= 0x1fff) {
return !!test_bit(msr, msr_bitmap + 0x800 / f);
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
msr &= 0x1fff;
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
}
return true;
}
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
*/
static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
unsigned long *msr_bitmap_nested,
u32 msr, int type)
{
int f = sizeof(unsigned long);
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
*/
if (msr <= 0x1fff) {
if (type & MSR_TYPE_R &&
!test_bit(msr, msr_bitmap_l1 + 0x000 / f))
/* read-low */
__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
if (type & MSR_TYPE_W &&
!test_bit(msr, msr_bitmap_l1 + 0x800 / f))
/* write-low */
__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
msr &= 0x1fff;
if (type & MSR_TYPE_R &&
!test_bit(msr, msr_bitmap_l1 + 0x400 / f))
/* read-high */
__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
if (type & MSR_TYPE_W &&
!test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
/* write-high */
__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
}
if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
}
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
@ -600,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
}
}
#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
static inline \
void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
unsigned long *msr_bitmap_l1, \
unsigned long *msr_bitmap_l0, u32 msr) \
{ \
if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
else \
vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
}
BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
unsigned long *msr_bitmap_l1,
unsigned long *msr_bitmap_l0,
u32 msr, int types)
{
if (types & MSR_TYPE_R)
nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
msr_bitmap_l0, msr);
if (types & MSR_TYPE_W)
nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
msr_bitmap_l0, msr);
}
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@ -607,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int msr;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
if (!cpu_has_vmx_msr_bitmap() ||
@ -625,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
/*
* To keep the control flow simple, pay eight 8-byte writes (sixteen
* 4-byte writes on 32-bit systems) up front to enable intercepts for
* the x2APIC MSR range and selectively disable them below.
* the x2APIC MSR range and selectively toggle those relevant to L2.
*/
enable_x2apic_msr_intercepts(msr_bitmap_l0);
@ -644,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
}
}
nested_vmx_disable_intercept_for_msr(
nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_TASKPRI),
MSR_TYPE_R | MSR_TYPE_W);
if (nested_cpu_has_vid(vmcs12)) {
nested_vmx_disable_intercept_for_msr(
nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_EOI),
MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_SELF_IPI),
MSR_TYPE_W);
}
}
/* KVM unconditionally exposes the FS/GS base MSRs to L1. */
#ifdef CONFIG_X86_64
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
MSR_FS_BASE, MSR_TYPE_RW);
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
MSR_GS_BASE, MSR_TYPE_RW);
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
#endif
/*
* Checking the L0->L1 bitmap is trying to verify two things:
*
* 1. L0 gave a permission to L1 to actually passthrough the MSR. This
* ensures that we do not accidentally generate an L02 MSR bitmap
* from the L12 MSR bitmap that is too permissive.
* 2. That L1 or L2s have actually used the MSR. This avoids
* unnecessarily merging of the bitmap if the MSR is unused. This
* works properly because we only update the L01 MSR bitmap lazily.
* So even if L0 should pass L1 these MSRs, the L01 bitmap is only
* updated to reflect this when L1 (or its L2s) actually write to
* the MSR.
* Always check vmcs01's bitmap to honor userspace MSR filters and any
* other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
*/
if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_SPEC_CTRL,
MSR_TYPE_R | MSR_TYPE_W);
#ifdef CONFIG_X86_64
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_FS_BASE, MSR_TYPE_RW);
if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PRED_CMD,
MSR_TYPE_W);
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_GS_BASE, MSR_TYPE_RW);
kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
#endif
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PRED_CMD, MSR_TYPE_W);
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
return true;
}
@ -5379,7 +5343,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
struct {
u64 eptp, gpa;
} operand;
int i, r;
int i, r, gpr_index;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_EPT) ||
@ -5392,7 +5356,8 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
type = kvm_register_read(vcpu, gpr_index);
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
@ -5459,7 +5424,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
u64 gla;
} operand;
u16 vpid02;
int r;
int r, gpr_index;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@ -5472,7 +5437,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return 1;
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
type = kvm_register_read(vcpu, gpr_index);
types = (vmx->nested.msrs.vpid_caps &
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;

View File

@ -118,16 +118,15 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
idx &= ~(3u << 30);
return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
(fixed && idx >= pmu->nr_arch_fixed_counters);
return fixed ? idx < pmu->nr_arch_fixed_counters
: idx < pmu->nr_arch_gp_counters;
}
static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,

View File

@ -769,24 +769,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
/*
* Check if MSR is intercepted for currently loaded MSR bitmap.
*/
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
{
unsigned long *msr_bitmap;
int f = sizeof(unsigned long);
if (!cpu_has_vmx_msr_bitmap())
if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
return true;
msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
if (msr <= 0x1fff) {
return !!test_bit(msr, msr_bitmap + 0x800 / f);
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
msr &= 0x1fff;
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
}
return true;
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
MSR_IA32_SPEC_CTRL);
}
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@ -3697,46 +3686,6 @@ void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
{
int f = sizeof(unsigned long);
if (msr <= 0x1fff)
__clear_bit(msr, msr_bitmap + 0x000 / f);
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
__clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
}
static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
{
int f = sizeof(unsigned long);
if (msr <= 0x1fff)
__clear_bit(msr, msr_bitmap + 0x800 / f);
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
__clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
}
static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
{
int f = sizeof(unsigned long);
if (msr <= 0x1fff)
__set_bit(msr, msr_bitmap + 0x000 / f);
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
__set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
}
static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
{
int f = sizeof(unsigned long);
if (msr <= 0x1fff)
__set_bit(msr, msr_bitmap + 0x800 / f);
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
}
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -5494,6 +5443,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
u64 pcid;
u64 gla;
} operand;
int gpr_index;
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
@ -5501,12 +5451,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
}
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
if (type > 3) {
kvm_inject_gp(vcpu, 0);
return 1;
}
gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
type = kvm_register_read(vcpu, gpr_index);
/* According to the Intel instruction reference, the memory operand
* is read even if it isn't needed (e.g., for type==all)
@ -6749,7 +6695,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
@ -7563,7 +7509,8 @@ static void hardware_unsetup(void)
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_HYPERV);
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
return supported & BIT(bit);
}

View File

@ -400,6 +400,34 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
/*
* Note, early Intel manuals have the write-low and read-high bitmap offsets
* the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and
* 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and
* 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and
* 0xc00-0xfff for writes. MSRs not covered by either of the ranges always
* VM-Exit.
*/
#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \
static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \
u32 msr) \
{ \
int f = sizeof(unsigned long); \
\
if (msr <= 0x1fff) \
return bitop##_bit(msr, bitmap + base / f); \
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \
return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
return (rtype)true; \
}
#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \
__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \
__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
static inline u8 vmx_get_rvi(void)
{
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@ -522,4 +550,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(struct kvm_vcpu *vcpu);
static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
{
return (vmx_instr_info >> 28) & 0xf;
}
#endif /* __KVM_X86_VMX_H */

View File

@ -3260,8 +3260,11 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
static void record_steal_time(struct kvm_vcpu *vcpu)
{
struct kvm_host_map map;
struct kvm_steal_time *st;
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
u64 steal;
u32 version;
if (kvm_xen_msr_enabled(vcpu->kvm)) {
kvm_xen_runstate_set_running(vcpu);
@ -3271,47 +3274,86 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
/* -EAGAIN is returned in atomic context so we can just return. */
if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
&map, &vcpu->arch.st.cache, false))
if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
return;
st = map.hva +
offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
slots = kvm_memslots(vcpu->kvm);
if (unlikely(slots->generation != ghc->generation ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
/* We rely on the fact that it fits in a single page. */
BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)
return;
}
st = (struct kvm_steal_time __user *)ghc->hva;
/*
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
u8 st_preempted = xchg(&st->preempted, 0);
u8 st_preempted = 0;
int err = -EFAULT;
if (!user_access_begin(st, sizeof(*st)))
return;
asm volatile("1: xchgb %0, %2\n"
"xor %1, %1\n"
"2:\n"
_ASM_EXTABLE_UA(1b, 2b)
: "+r" (st_preempted),
"+&r" (err)
: "m" (st->preempted));
if (err)
goto out;
user_access_end();
vcpu->arch.st.preempted = 0;
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
st_preempted & KVM_VCPU_FLUSH_TLB);
if (st_preempted & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
if (!user_access_begin(st, sizeof(*st)))
goto dirty;
} else {
st->preempted = 0;
if (!user_access_begin(st, sizeof(*st)))
return;
unsafe_put_user(0, &st->preempted, out);
vcpu->arch.st.preempted = 0;
}
vcpu->arch.st.preempted = 0;
unsafe_get_user(version, &st->version, out);
if (version & 1)
version += 1; /* first time write, random junk */
if (st->version & 1)
st->version += 1; /* first time write, random junk */
st->version += 1;
version += 1;
unsafe_put_user(version, &st->version, out);
smp_wmb();
st->steal += current->sched_info.run_delay -
unsafe_get_user(steal, &st->steal, out);
steal += current->sched_info.run_delay -
vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
unsafe_put_user(steal, &st->steal, out);
smp_wmb();
version += 1;
unsafe_put_user(version, &st->version, out);
st->version += 1;
kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
out:
user_access_end();
dirty:
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@ -3517,7 +3559,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
return 1;
if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
@ -4137,7 +4179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
break;
case KVM_CAP_NR_VCPUS:
r = KVM_SOFT_MAX_VCPUS;
r = num_online_cpus();
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@ -4351,8 +4393,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
struct kvm_host_map map;
struct kvm_steal_time *st;
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@ -4360,16 +4404,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (vcpu->arch.st.preempted)
return;
if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
&vcpu->arch.st.cache, true))
/* This happens on process exit */
if (unlikely(current->mm != vcpu->kvm->mm))
return;
st = map.hva +
offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
slots = kvm_memslots(vcpu->kvm);
st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
if (unlikely(slots->generation != ghc->generation ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot))
return;
kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
st = (struct kvm_steal_time __user *)ghc->hva;
BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@ -5728,6 +5779,12 @@ split_irqchip_unlock:
if (kvm_x86_ops.vm_copy_enc_context_from)
r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
return r;
case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
r = -EINVAL;
if (kvm_x86_ops.vm_move_enc_context_from)
r = kvm_x86_ops.vm_move_enc_context_from(
kvm, cap->args[0]);
return r;
case KVM_CAP_EXIT_HYPERCALL:
if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
r = -EINVAL;
@ -7328,7 +7385,9 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
return 0;
return -EINVAL;
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@ -9552,7 +9611,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
r = -EIO;
goto out;
}
@ -10564,6 +10623,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return ret;
}
static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
{
bool inhibit = false;
struct kvm_vcpu *vcpu;
int i;
down_write(&kvm->arch.apicv_update_lock);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
inhibit = true;
break;
}
}
__kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
up_write(&kvm->arch.apicv_update_lock);
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
@ -10616,6 +10693,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
static_call(kvm_x86_update_exception_bitmap)(vcpu);
kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
r = 0;
out:
@ -10859,11 +10938,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
int idx;
kvm_release_pfn(cache->pfn, cache->dirty, cache);
kvmclock_reset(vcpu);
static_call(kvm_x86_vcpu_free)(vcpu);
@ -12275,7 +12351,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return kvm_skip_emulated_instruction(vcpu);
default:
BUG(); /* We have already checked above that type <= 3 */
kvm_inject_gp(vcpu, 0);
return 1;
}
}
EXPORT_SYMBOL_GPL(kvm_handle_invpcid);

View File

@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
swiotlb_adjust_size(size);
}
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
{
unsigned long pfn = 0;
pgprot_t prot;
switch (level) {
case PG_LEVEL_4K:
pfn = pte_pfn(*kpte);
prot = pte_pgprot(*kpte);
break;
case PG_LEVEL_2M:
pfn = pmd_pfn(*(pmd_t *)kpte);
prot = pmd_pgprot(*(pmd_t *)kpte);
break;
case PG_LEVEL_1G:
pfn = pud_pfn(*(pud_t *)kpte);
prot = pud_pgprot(*(pud_t *)kpte);
break;
default:
WARN_ONCE(1, "Invalid level for kpte\n");
return 0;
}
if (ret_prot)
*ret_prot = prot;
return pfn;
}
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
{
#ifdef CONFIG_PARAVIRT
unsigned long sz = npages << PAGE_SHIFT;
unsigned long vaddr_end = vaddr + sz;
while (vaddr < vaddr_end) {
int psize, pmask, level;
unsigned long pfn;
pte_t *kpte;
kpte = lookup_address(vaddr, &level);
if (!kpte || pte_none(*kpte)) {
WARN_ONCE(1, "kpte lookup for vaddr\n");
return;
}
pfn = pg_level_to_pfn(level, kpte, NULL);
if (!pfn)
continue;
psize = page_level_size(level);
pmask = page_level_mask(level);
notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
vaddr = (vaddr & pmask) + psize;
}
#endif
}
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
{
pgprot_t old_prot, new_prot;
unsigned long pfn, pa, size;
pte_t new_pte;
switch (level) {
case PG_LEVEL_4K:
pfn = pte_pfn(*kpte);
old_prot = pte_pgprot(*kpte);
break;
case PG_LEVEL_2M:
pfn = pmd_pfn(*(pmd_t *)kpte);
old_prot = pmd_pgprot(*(pmd_t *)kpte);
break;
case PG_LEVEL_1G:
pfn = pud_pfn(*(pud_t *)kpte);
old_prot = pud_pgprot(*(pud_t *)kpte);
break;
default:
pfn = pg_level_to_pfn(level, kpte, &old_prot);
if (!pfn)
return;
}
new_prot = old_prot;
if (enc)
@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
static int __init early_set_memory_enc_dec(unsigned long vaddr,
unsigned long size, bool enc)
{
unsigned long vaddr_end, vaddr_next;
unsigned long vaddr_end, vaddr_next, start;
unsigned long psize, pmask;
int split_page_size_mask;
int level, ret;
pte_t *kpte;
start = vaddr;
vaddr_next = vaddr;
vaddr_end = vaddr + size;
@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
ret = 0;
notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
out:
__flush_tlb_all();
return ret;
@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
return early_set_memory_enc_dec(vaddr, size, true);
}
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
{
notify_range_enc_status_changed(vaddr, npages, enc);
}
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{

View File

@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
*/
cpa_flush(&cpa, 0);
/*
* Notify hypervisor that a given memory range is mapped encrypted
* or decrypted.
*/
notify_range_enc_status_changed(addr, numpages, enc);
return ret;
}

View File

@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
/* OEM GUIDs */
#define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
#define AMD_SEV_MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
typedef struct {
efi_guid_t guid;

View File

@ -150,7 +150,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
#define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@ -617,6 +617,7 @@ struct kvm {
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
bool vm_bugged;
bool vm_dead;
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
struct notifier_block pm_notifier;
@ -650,12 +651,19 @@ struct kvm {
#define vcpu_err(vcpu, fmt, ...) \
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
static inline void kvm_vm_dead(struct kvm *kvm)
{
kvm->vm_dead = true;
kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD);
}
static inline void kvm_vm_bugged(struct kvm *kvm)
{
kvm->vm_bugged = true;
kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
kvm_vm_dead(kvm);
}
#define KVM_BUG(cond, kvm, fmt...) \
({ \
int __ret = (cond); \

View File

@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_BINARY_STATS_FD 203
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#ifdef KVM_CAP_IRQ_ROUTING

View File

@ -73,7 +73,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test

View File

@ -82,6 +82,7 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);

View File

@ -46,4 +46,6 @@ static inline bool cpu_has_svm(void)
return ecx & CPUID_SVM;
}
int open_sev_dev_path_or_exit(void);
#endif /* SELFTEST_KVM_SVM_UTILS_H */

View File

@ -31,6 +31,19 @@ static void *align(void *x, size_t size)
return (void *) (((size_t) x + mask) & ~mask);
}
int open_path_or_exit(const char *path, int flags)
{
int fd;
fd = open(path, flags);
if (fd < 0) {
print_skip("%s not available (errno: %d)", path, errno);
exit(KSFT_SKIP);
}
return fd;
}
/*
* Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
@ -42,16 +55,7 @@ static void *align(void *x, size_t size)
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
int fd;
fd = open(KVM_DEV_PATH, flags);
if (fd < 0) {
print_skip("%s not available, is KVM loaded? (errno: %d)",
KVM_DEV_PATH, errno);
exit(KSFT_SKIP);
}
return fd;
return open_path_or_exit(KVM_DEV_PATH, flags);
}
int open_kvm_dev_path_or_exit(void)

View File

@ -13,6 +13,8 @@
#include "processor.h"
#include "svm_util.h"
#define SEV_DEV_PATH "/dev/sev"
struct gpr64_regs guest_regs;
u64 rflags;
@ -172,3 +174,14 @@ void nested_svm_check_supported(void)
exit(KSFT_SKIP);
}
}
/*
* Open SEV_DEV_PATH if available, otherwise exit the entire program.
*
* Return:
* The opened file descriptor of /dev/sev.
*/
int open_sev_dev_path_or_exit(void)
{
return open_path_or_exit(SEV_DEV_PATH, 0);
}

View File

@ -0,0 +1,203 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kvm.h>
#include <linux/psp-sev.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <stdlib.h>
#include <errno.h>
#include <pthread.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "svm_util.h"
#include "kselftest.h"
#include "../lib/kvm_util_internal.h"
#define SEV_POLICY_ES 0b100
#define NR_MIGRATE_TEST_VCPUS 4
#define NR_MIGRATE_TEST_VMS 3
#define NR_LOCK_TESTING_THREADS 3
#define NR_LOCK_TESTING_ITERATIONS 10000
static void sev_ioctl(int vm_fd, int cmd_id, void *data)
{
struct kvm_sev_cmd cmd = {
.id = cmd_id,
.data = (uint64_t)data,
.sev_fd = open_sev_dev_path_or_exit(),
};
int ret;
ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
"%d failed: return code: %d, errno: %d, fw error: %d",
cmd_id, ret, errno, cmd.error);
}
static struct kvm_vm *sev_vm_create(bool es)
{
struct kvm_vm *vm;
struct kvm_sev_launch_start start = { 0 };
int i;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(vm, i);
if (es)
start.policy |= SEV_POLICY_ES;
sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
if (es)
sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
return vm;
}
static struct kvm_vm *__vm_create(void)
{
struct kvm_vm *vm;
int i;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(vm, i);
return vm;
}
static int __sev_migrate_from(int dst_fd, int src_fd)
{
struct kvm_enable_cap cap = {
.cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
.args = { src_fd }
};
return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
}
static void sev_migrate_from(int dst_fd, int src_fd)
{
int ret;
ret = __sev_migrate_from(dst_fd, src_fd);
TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
}
static void test_sev_migrate_from(bool es)
{
struct kvm_vm *src_vm;
struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
int i;
src_vm = sev_vm_create(es);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
dst_vms[i] = __vm_create();
/* Initial migration from the src to the first dst. */
sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
/* Migrate the guest back to the original VM. */
sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
kvm_vm_free(src_vm);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
kvm_vm_free(dst_vms[i]);
}
struct locking_thread_input {
struct kvm_vm *vm;
int source_fds[NR_LOCK_TESTING_THREADS];
};
static void *locking_test_thread(void *arg)
{
int i, j;
struct locking_thread_input *input = (struct locking_thread_input *)arg;
for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
j = i % NR_LOCK_TESTING_THREADS;
__sev_migrate_from(input->vm->fd, input->source_fds[j]);
}
return NULL;
}
static void test_sev_migrate_locking(void)
{
struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
pthread_t pt[NR_LOCK_TESTING_THREADS];
int i;
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
input[i].vm = sev_vm_create(/* es= */ false);
input[0].source_fds[i] = input[i].vm->fd;
}
for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
memcpy(input[i].source_fds, input[0].source_fds,
sizeof(input[i].source_fds));
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_join(pt[i], NULL);
}
static void test_sev_migrate_parameters(void)
{
struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
*sev_es_vm_no_vmsa;
int ret;
sev_vm = sev_vm_create(/* es= */ false);
sev_es_vm = sev_vm_create(/* es= */ true);
vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
vm_no_sev = __vm_create();
sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
vm_vcpu_add(sev_es_vm_no_vmsa, 1);
ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Migrations require SEV enabled. ret %d, errno: %d\n", ret,
errno);
}
int main(int argc, char *argv[])
{
test_sev_migrate_from(/* es= */ false);
test_sev_migrate_from(/* es= */ true);
test_sev_migrate_locking();
test_sev_migrate_parameters();
return 0;
}

View File

@ -3747,7 +3747,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_fpu *fpu = NULL;
struct kvm_sregs *kvm_sregs = NULL;
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
return -EIO;
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@ -3957,7 +3957,7 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
void __user *argp = compat_ptr(arg);
int r;
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
return -EIO;
switch (ioctl) {
@ -4023,7 +4023,7 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
{
struct kvm_device *dev = filp->private_data;
if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
return -EIO;
switch (ioctl) {
@ -4345,7 +4345,7 @@ static long kvm_vm_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int r;
if (kvm->mm != current->mm || kvm->vm_bugged)
if (kvm->mm != current->mm || kvm->vm_dead)
return -EIO;
switch (ioctl) {
case KVM_CREATE_VCPU:
@ -4556,7 +4556,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
struct kvm *kvm = filp->private_data;
int r;
if (kvm->mm != current->mm || kvm->vm_bugged)
if (kvm->mm != current->mm || kvm->vm_dead)
return -EIO;
switch (ioctl) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT