From 250552b925ce400c17d166422fde9bb215958481 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 29 Nov 2021 10:47:01 +0100 Subject: [PATCH 1/8] KVM: nVMX: Don't use Enlightened MSR Bitmap for L3 When KVM runs as a nested hypervisor on top of Hyper-V it uses Enlightened VMCS and enables Enlightened MSR Bitmap feature for its L1s and L2s (which are actually L2s and L3s from Hyper-V's perspective). When MSR bitmap is updated, KVM has to reset HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP from clean fields to make Hyper-V aware of the change. For KVM's L1s, this is done in vmx_disable_intercept_for_msr()/vmx_enable_intercept_for_msr(). MSR bitmap for L2 is build in nested_vmx_prepare_msr_bitmap() by blending MSR bitmap for L1 and L1's idea of MSR bitmap for L2. KVM, however, doesn't check if the resulting bitmap is different and never cleans HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP in eVMCS02. This is incorrect and may result in Hyper-V missing the update. The issue could've been solved by calling evmcs_touch_msr_bitmap() for eVMCS02 from nested_vmx_prepare_msr_bitmap() unconditionally but doing so would not give any performance benefits (compared to not using Enlightened MSR Bitmap at all). 3-level nesting is also not a very common setup nowadays. Don't enable 'Enlightened MSR Bitmap' feature for KVM's L2s (real L3s) for now. Signed-off-by: Vitaly Kuznetsov Message-Id: <20211129094704.326635-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9453743ce0c4..5aadad3e7367 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2646,15 +2646,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) if (!loaded_vmcs->msr_bitmap) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); - - if (IS_ENABLED(CONFIG_HYPERV) && - static_branch_unlikely(&enable_evmcs) && - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { - struct hv_enlightened_vmcs *evmcs = - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; - - evmcs->hv_enlightenments_control.msr_bitmap = 1; - } } memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); @@ -6842,6 +6833,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) if (err < 0) goto free_pml; + /* + * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a + * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the + * feature only for vmcs01, KVM currently isn't equipped to realize any + * performance benefits from enabling it for vmcs02. + */ + if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) && + (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { + struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs; + + evmcs->hv_enlightenments_control.msr_bitmap = 1; + } + /* The MSR bitmap starts with all ones */ bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); From ee3a4f666207b5a9d3d4bc7f45c9d59f2aeb3a0d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Fri, 3 Dec 2021 00:10:13 +0100 Subject: [PATCH 2/8] KVM: x86: selftests: svm_int_ctl_test: fix intercept calculation INTERCEPT_x are bit positions, but the code was using the raw value of INTERCEPT_VINTR (4) instead of BIT(INTERCEPT_VINTR). This resulted in masking of bit 2 - that is, SMI instead of VINTR. Signed-off-by: Maciej S. Szmigiero Message-Id: <49b9571d25588870db5380b0be1a41df4bbaaf93.1638486479.git.maciej.szmigiero@oracle.com> --- tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c index df04f56ce859..30a81038df46 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm) vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; /* No intercepts for real and virtual interrupts */ - vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR); + vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR)); /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT); From 1ebfaa11ebb5b603a3c3f54b2e84fcf1030f5a14 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 9 Dec 2021 11:29:37 +0100 Subject: [PATCH 3/8] KVM: x86: Wait for IPIs to be delivered when handling Hyper-V TLB flush hypercall Prior to commit 0baedd792713 ("KVM: x86: make Hyper-V PV TLB flush use tlb_flush_guest()"), kvm_hv_flush_tlb() was using 'KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP' when making a request to flush TLBs on other vCPUs and KVM_REQ_TLB_FLUSH is/was defined as: (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) so KVM_REQUEST_WAIT was lost. Hyper-V TLFS, however, requires that "This call guarantees that by the time control returns back to the caller, the observable effects of all flushes on the specified virtual processors have occurred." and without KVM_REQUEST_WAIT there's a small chance that the vCPU making the TLB flush will resume running before all IPIs get delivered to other vCPUs and a stale mapping can get read there. Fix the issue by adding KVM_REQUEST_WAIT flag to KVM_REQ_TLB_FLUSH_GUEST: kvm_hv_flush_tlb() is the sole caller which uses it for kvm_make_all_cpus_request()/kvm_make_vcpus_request_mask() where KVM_REQUEST_WAIT makes a difference. Cc: stable@kernel.org Fixes: 0baedd792713 ("KVM: x86: make Hyper-V PV TLB flush use tlb_flush_guest()") Signed-off-by: Vitaly Kuznetsov Message-Id: <20211209102937.584397-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 860ed500580c..2164b9f4c7b0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -97,7 +97,7 @@ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) #define KVM_REQ_TLB_FLUSH_GUEST \ - KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ From 3244867af8c065e51969f1bffe732d3ebfd9a7d2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 22:09:19 +0000 Subject: [PATCH 4/8] KVM: x86: Ignore sparse banks size for an "all CPUs", non-sparse IPI req Do not bail early if there are no bits set in the sparse banks for a non-sparse, a.k.a. "all CPUs", IPI request. Per the Hyper-V spec, it is legal to have a variable length of '0', e.g. VP_SET's BankContents in this case, if the request can be serviced without the extra info. It is possible that for a given invocation of a hypercall that does accept variable sized input headers that all the header input fits entirely within the fixed size header. In such cases the variable sized input header is zero-sized and the corresponding bits in the hypercall input should be set to zero. Bailing early results in KVM failing to send IPIs to all CPUs as expected by the guest. Fixes: 214ff83d4473 ("KVM: x86: hyperv: implement PV IPI send hypercalls") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Vitaly Kuznetsov Message-Id: <20211207220926.718794-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5e19e6e4c2ce..8d8c1cc7cb53 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1922,11 +1922,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; + if (all_cpus) + goto check_and_send_ipi; + if (!sparse_banks_len) goto ret_success; - if (!all_cpus && - kvm_read_guest(kvm, + if (kvm_read_guest(kvm, hc->ingpa + offsetof(struct hv_send_ipi_ex, vp_set.bank_contents), sparse_banks, @@ -1934,6 +1936,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool return HV_STATUS_INVALID_HYPERCALL_INPUT; } +check_and_send_ipi: if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; From c8cc43c1eae2910ac96daa4216e0fb3391ad0504 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Aug 2021 06:54:23 -0400 Subject: [PATCH 5/8] selftests: KVM: avoid failures due to reserved HyperTransport region AMD proceessors define an address range that is reserved by HyperTransport and causes a failure if used for guest physical addresses. Avoid selftests failures by reserving those guest physical addresses; the rules are: - On parts with <40 bits, its fully hidden from software. - Before Fam17h, it was always 12G just below 1T, even if there was more RAM above this location. In this case we just not use any RAM above 1T. - On Fam17h and later, it is variable based on SME, and is either just below 2^48 (no encryption) or 2^43 (encryption). Fixes: ef4c9f4f6546 ("KVM: selftests: Fix 32-bit truncation of vm_get_max_gfn()") Cc: stable@vger.kernel.org Cc: David Matlack Reported-by: Maxim Levitsky Signed-off-by: Paolo Bonzini Message-Id: <20210805105423.412878-1-pbonzini@redhat.com> Reviewed-by: Sean Christopherson Tested-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util.h | 9 +++ tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- .../selftests/kvm/lib/x86_64/processor.c | 68 +++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 6a1a37f30494..da2b702da71a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -71,6 +71,15 @@ enum vm_guest_mode { #endif +#if defined(__x86_64__) +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +#else +static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; +} +#endif + #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8f2e0bb1ef96..daf6fdb217a7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -302,7 +302,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift); /* Limit physical addresses to PA-bits. */ - vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; + vm->max_gfn = vm_compute_max_gfn(vm); /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 82c39db91369..eef7b34756d5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1431,3 +1431,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui return cpuid; } + +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +static inline unsigned x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ + unsigned long ht_gfn, max_gfn, max_pfn; + uint32_t eax, ebx, ecx, edx, max_ext_leaf; + + max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + + /* Avoid reserved HyperTransport region on AMD processors. */ + eax = ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || + ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || + edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + return max_gfn; + + /* On parts with <40 physical address bits, the area is fully hidden */ + if (vm->pa_bits < 40) + return max_gfn; + + /* Before family 17h, the HyperTransport area is just below 1T. */ + ht_gfn = (1 << 28) - num_ht_pages; + eax = 1; + cpuid(&eax, &ebx, &ecx, &edx); + if (x86_family(eax) < 0x17) + goto done; + + /* + * Otherwise it's at the top of the physical address space, possibly + * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use + * the old conservative value if MAXPHYADDR is not enumerated. + */ + eax = 0x80000000; + cpuid(&eax, &ebx, &ecx, &edx); + max_ext_leaf = eax; + if (max_ext_leaf < 0x80000008) + goto done; + + eax = 0x80000008; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; + if (max_ext_leaf >= 0x8000001f) { + eax = 0x8000001f; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn >>= (ebx >> 6) & 0x3f; + } + + ht_gfn = max_pfn - num_ht_pages; +done: + return min(max_gfn, ht_gfn - 1); +} From 777ab82d7ce0451fd47bb57e331548deba57394e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 7 Dec 2021 17:52:30 +0800 Subject: [PATCH 6/8] KVM: X86: Raise #GP when clearing CR0_PG in 64 bit mode In the SDM: If the logical processor is in 64-bit mode or if CR4.PCIDE = 1, an attempt to clear CR0.PG causes a general-protection exception (#GP). Software should transition to compatibility mode and clear CR4.PCIDE before attempting to disable paging. Signed-off-by: Lai Jiangshan Message-Id: <20211207095230.53437-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e0aa4dd53c7f..c473498c8512 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -890,7 +890,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) return 1; - if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) + if (!(cr0 & X86_CR0_PG) && + (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))) return 1; static_call(kvm_x86_set_cr0)(vcpu, cr0); From d07898eaf39909806128caccb6ebd922ee3edd69 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 25 Oct 2021 13:13:10 -0700 Subject: [PATCH 7/8] KVM: x86: Don't WARN if userspace mucks with RCX during string I/O exit Replace a WARN with a comment to call out that userspace can modify RCX during an exit to userspace to handle string I/O. KVM doesn't actually support changing the rep count during an exit, i.e. the scenario can be ignored, but the WARN needs to go as it's trivial to trigger from userspace. Cc: stable@vger.kernel.org Fixes: 3b27de271839 ("KVM: x86: split the two parts of emulator_pio_in") Signed-off-by: Sean Christopherson Message-Id: <20211025201311.1881846-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c473498c8512..0cf1082455df 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7122,7 +7122,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port, void *val, unsigned int count) { if (vcpu->arch.pio.count) { - /* Complete previous iteration. */ + /* + * Complete a previous iteration that required userspace I/O. + * Note, @count isn't guaranteed to match pio.count as userspace + * can modify ECX before rerunning the vCPU. Ignore any such + * shenanigans as KVM doesn't support modifying the rep count, + * and the emulator ensures @count doesn't overflow the buffer. + */ } else { int r = __emulator_pio_in(vcpu, size, port, count); if (!r) @@ -7131,7 +7137,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, /* Results already available, fall through. */ } - WARN_ON(count != vcpu->arch.pio.count); complete_emulator_pio_in(vcpu, val); return 1; } From 10e7a099bfd860a2b77ea8aaac661f52c16dd865 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 25 Oct 2021 13:13:11 -0700 Subject: [PATCH 8/8] selftests: KVM: Add test to verify KVM doesn't explode on "bad" I/O Add an x86 selftest to verify that KVM doesn't WARN or otherwise explode if userspace modifies RCX during a userspace exit to handle string I/O. This is a regression test for a user-triggerable WARN introduced by commit 3b27de271839 ("KVM: x86: split the two parts of emulator_pio_in"). Signed-off-by: Sean Christopherson Message-Id: <20211025201311.1881846-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/x86_64/userspace_io_test.c | 114 ++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/userspace_io_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 3763105029fb..00814c0f87a6 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -30,6 +30,7 @@ /x86_64/svm_int_ctl_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test +/x86_64/userspace_io_test /x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c4e34717826a..f307c9f61981 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c new file mode 100644 index 000000000000..e4bef2e05686 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 1 + +static void guest_ins_port80(uint8_t *buffer, unsigned int count) +{ + unsigned long end; + + if (count == 2) + end = (unsigned long)buffer + 1; + else + end = (unsigned long)buffer + 8192; + + asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); + GUEST_ASSERT_1(count == 0, count); + GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end); +} + +static void guest_code(void) +{ + uint8_t buffer[8192]; + int i; + + /* + * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to + * test that KVM doesn't explode when userspace modifies the "count" on + * a userspace I/O exit. KVM isn't required to play nice with the I/O + * itself as KVM doesn't support manipulating the count, it just needs + * to not explode or overflow a buffer. + */ + guest_ins_port80(buffer, 2); + guest_ins_port80(buffer, 3); + + /* Verify KVM fills the buffer correctly when not stuffing RCX. */ + memset(buffer, 0, sizeof(buffer)); + guest_ins_port80(buffer, 8192); + for (i = 0; i < 8192; i++) + GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_regs regs; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + run = vcpu_state(vm, VCPU_ID); + + memset(®s, 0, sizeof(regs)); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + if (get_ucall(vm, VCPU_ID, &uc)) + break; + + TEST_ASSERT(run->io.port == 0x80, + "Expected I/O at port 0x80, got port 0x%x\n", run->io.port); + + /* + * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. + * Note, this abuses KVM's batching of rep string I/O to avoid + * getting stuck in an infinite loop. That behavior isn't in + * scope from a testing perspective as it's not ABI in any way, + * i.e. it really is abusing internal KVM knowledge. + */ + vcpu_regs_get(vm, VCPU_ID, ®s); + if (regs.rcx == 2) + regs.rcx = 1; + if (regs.rcx == 3) + regs.rcx = 8192; + memset((void *)run + run->io.data_offset, 0xaa, 4096); + vcpu_regs_set(vm, VCPU_ID, ®s); + } + + switch (uc.cmd) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx", + (const char *)uc.args[0], __FILE__, uc.args[1], + uc.args[2], uc.args[3]); + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +}