KVM fixes for v4.11-rc2

ARM updates from Marc Zyngier:
  "vgic updates:
   - Honour disabling the ITS
   - Don't deadlock when deactivating own interrupts via MMIO
   - Correctly expose the lact of IRQ/FIQ bypass on GICv3
 
   I/O virtualization:
   - Make KVM_CAP_NR_MEMSLOTS big enough for large guests with
     many PCIe devices
 
   General bug fixes:
   - Gracefully handle exception generated with syndroms that
     the host doesn't understand
   - Properly invalidate TLBs on VHE systems"
 
 x86:
  - improvements in emulation of VMCLEAR, VMX MSR bitmaps, and VCPU reset
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABCAAGBQJYxENfAAoJEED/6hsPKofoEEkIAIWglnOGOHqf4pPv9OThKzKm
 5CGINdPVEkJ56QNaYrINiQRHAzIUg8dsrhsisYmEdYGv3Mxf5WO0OebfzTrniNm4
 GXIM8OuYD04MSnIomfGGBAwFZ6ptgdeD+PVkSFYHArkvWYfPm54ghjVj3AXmkicf
 tRiIsPSiL/QT0vha5LBGfwsWOYavmZRfQBNA5yYUIHgO0Mp7LI24AeZOQiSM2ngx
 Gl5xfzk0bayhZSBr+r/fvxqbEd0udiY7klGEvt3hrPT+JzzpoamEgCCZ6eLFZbGM
 eABeQUzm7StD4Ib3WHkVU81ysOWndL0TK94BBBLIn1j+ht9FLi9iGkmTYspk9po=
 =/phS
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM updates from Marc Zyngier:
   - vgic updates:
     - Honour disabling the ITS
     - Don't deadlock when deactivating own interrupts via MMIO
     - Correctly expose the lact of IRQ/FIQ bypass on GICv3

   - I/O virtualization:
     - Make KVM_CAP_NR_MEMSLOTS big enough for large guests with many
       PCIe devices

   - General bug fixes:
     - Gracefully handle exception generated with syndroms that the host
       doesn't understand
     - Properly invalidate TLBs on VHE systems

  x86:
   - improvements in emulation of VMCLEAR, VMX MSR bitmaps, and VCPU
     reset

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: nVMX: do not warn when MSR bitmap address is not backed
  KVM: arm64: Increase number of user memslots to 512
  KVM: arm/arm64: Remove KVM_PRIVATE_MEM_SLOTS definition that are unused
  KVM: arm/arm64: Enable KVM_CAP_NR_MEMSLOTS on arm/arm64
  KVM: Add documentation for KVM_CAP_NR_MEMSLOTS
  KVM: arm/arm64: VGIC: Fix command handling while ITS being disabled
  arm64: KVM: Survive unknown traps from guests
  arm: KVM: Survive unknown traps from guests
  KVM: arm/arm64: Let vcpu thread modify its own active state
  KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset
  kvm: nVMX: VMCLEAR should not cause the vCPU to shut down
  KVM: arm/arm64: vgic-v3: Don't pretend to support IRQ/FIQ bypass
  arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs
This commit is contained in:
Linus Torvalds 2017-03-11 14:24:58 -08:00
commit 106e4da602
13 changed files with 191 additions and 101 deletions

View File

@ -951,6 +951,10 @@ This ioctl allows the user to create or modify a guest physical memory
slot. When changing an existing slot, it may be moved in the guest
physical memory space, or its flags may be modified. It may not be
resized. Slots may not overlap in guest physical address space.
Bits 0-15 of "slot" specifies the slot id and this value should be
less than the maximum number of user memory slots supported per VM.
The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
if this capability is supported by the architecture.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
specifies the address space which is being modified. They must be

View File

@ -209,6 +209,7 @@
#define HSR_EC_IABT_HYP (0x21)
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
#define HSR_EC_MAX (0x3f)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)

View File

@ -30,7 +30,6 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define KVM_USER_MEM_SLOTS 32
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HAVE_ONE_REG
#define KVM_HALT_POLL_NS_DEFAULT 500000

View File

@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_MSI_DEVID:
if (!kvm)
r = -EINVAL;

View File

@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
u32 hsr = kvm_vcpu_get_hsr(vcpu);
kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
hsr);
kvm_inject_undefined(vcpu);
return 1;
}
static exit_handle_fn arm_exit_handlers[] = {
[0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
[HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
!arm_exit_handlers[hsr_ec]) {
kvm_err("Unknown exception class: hsr: %#08x\n",
(unsigned int)kvm_vcpu_get_hsr(vcpu));
BUG();
}
return arm_exit_handlers[hsr_ec];
}

View File

@ -30,8 +30,7 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define KVM_USER_MEM_SLOTS 32
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_USER_MEM_SLOTS 512
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HALT_POLL_NS_DEFAULT 500000

View File

@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
u32 hsr = kvm_vcpu_get_hsr(vcpu);
kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
hsr, esr_get_class_string(hsr));
kvm_inject_undefined(vcpu);
return 1;
}
static exit_handle_fn arm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
u32 hsr = kvm_vcpu_get_hsr(vcpu);
u8 hsr_ec = ESR_ELx_EC(hsr);
if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
!arm_exit_handlers[hsr_ec]) {
kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
hsr, esr_get_class_string(hsr));
BUG();
}
return arm_exit_handlers[hsr_ec];
}

View File

@ -18,14 +18,62 @@
#include <asm/kvm_hyp.h>
#include <asm/tlbflush.h>
static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
{
u64 val;
/*
* With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
* most TLB operations target EL2/EL0. In order to affect the
* guest TLBs (EL1/EL0), we need to change one of these two
* bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
* let's flip TGE before executing the TLB operation.
*/
write_sysreg(kvm->arch.vttbr, vttbr_el2);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
isb();
}
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
{
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
}
static hyp_alternate_select(__tlb_switch_to_guest,
__tlb_switch_to_guest_nvhe,
__tlb_switch_to_guest_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
{
/*
* We're done with the TLB operation, let's restore the host's
* view of HCR_EL2.
*/
write_sysreg(0, vttbr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
}
static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
{
write_sysreg(0, vttbr_el2);
}
static hyp_alternate_select(__tlb_switch_to_host,
__tlb_switch_to_host_nvhe,
__tlb_switch_to_host_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
__tlb_switch_to_guest()(kvm);
/*
* We could do so much better if we had the VA as well.
@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
dsb(ish);
isb();
write_sysreg(0, vttbr_el2);
__tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
__tlb_switch_to_guest()(kvm);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
write_sysreg(0, vttbr_el2);
__tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
/* Switch to requested VMID */
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
__tlb_switch_to_guest()(kvm);
__tlbi(vmalle1);
dsb(nsh);
isb();
write_sysreg(0, vttbr_el2);
__tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_flush_vm_context(void)

View File

@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 zero = 0;
gpa_t vmptr;
struct vmcs12 *vmcs12;
struct page *page;
if (!nested_vmx_check_permission(vcpu))
return 1;
@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
page = nested_get_page(vcpu, vmptr);
if (page == NULL) {
/*
* For accurate processor emulation, VMCLEAR beyond available
* physical memory should do nothing at all. However, it is
* possible that a nested vmx bug, not a guest hypervisor bug,
* resulted in this case, so let's shut down before doing any
* more damage:
*/
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return 1;
}
vmcs12 = kmap(page);
vmcs12->launch_state = 0;
kunmap(page);
nested_release_page(page);
kvm_vcpu_write_guest(vcpu,
vmptr + offsetof(struct vmcs12, launch_state),
&zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
@ -9694,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
return false;
page = nested_get_page(vcpu, vmcs12->msr_bitmap);
if (!page) {
WARN_ON(1);
if (!page)
return false;
}
msr_bitmap_l1 = (unsigned long *)kmap(page);
memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@ -11121,8 +11105,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
static void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu))
if (is_guest_mode(vcpu)) {
to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
}
free_nested(to_vmx(vcpu));
}

View File

@ -373,6 +373,8 @@
#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
#define ICC_IGRPEN1_EL1_SHIFT 0
#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
#define ICC_SRE_EL1_DIB (1U << 2)
#define ICC_SRE_EL1_DFB (1U << 1)
#define ICC_SRE_EL1_SRE (1U << 0)
/*

View File

@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
return ret;
}
static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
struct vgic_its *its,
gpa_t addr, unsigned int len)
{
u32 reg = 0;
mutex_lock(&its->cmd_lock);
if (its->creadr == its->cwriter)
reg |= GITS_CTLR_QUIESCENT;
if (its->enabled)
reg |= GITS_CTLR_ENABLE;
mutex_unlock(&its->cmd_lock);
return reg;
}
static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
gpa_t addr, unsigned int len,
unsigned long val)
{
its->enabled = !!(val & GITS_CTLR_ENABLE);
}
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
/*
* By writing to CWRITER the guest announces new commands to be processed.
* To avoid any races in the first place, we take the its_cmd lock, which
* protects our ring buffer variables, so that there is only one user
* per ITS handling commands at a given time.
*/
static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
gpa_t addr, unsigned int len,
unsigned long val)
/* Must be called with the cmd_lock held. */
static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
u32 reg;
if (!its)
/* Commands are only processed when the ITS is enabled. */
if (!its->enabled)
return;
mutex_lock(&its->cmd_lock);
reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
reg = ITS_CMD_OFFSET(reg);
if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
mutex_unlock(&its->cmd_lock);
return;
}
its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
}
/*
* By writing to CWRITER the guest announces new commands to be processed.
* To avoid any races in the first place, we take the its_cmd lock, which
* protects our ring buffer variables, so that there is only one user
* per ITS handling commands at a given time.
*/
static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
gpa_t addr, unsigned int len,
unsigned long val)
{
u64 reg;
if (!its)
return;
mutex_lock(&its->cmd_lock);
reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
reg = ITS_CMD_OFFSET(reg);
if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
mutex_unlock(&its->cmd_lock);
return;
}
its->cwriter = reg;
vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
*regptr = reg;
}
static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
struct vgic_its *its,
gpa_t addr, unsigned int len)
{
u32 reg = 0;
mutex_lock(&its->cmd_lock);
if (its->creadr == its->cwriter)
reg |= GITS_CTLR_QUIESCENT;
if (its->enabled)
reg |= GITS_CTLR_ENABLE;
mutex_unlock(&its->cmd_lock);
return reg;
}
static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
gpa_t addr, unsigned int len,
unsigned long val)
{
mutex_lock(&its->cmd_lock);
its->enabled = !!(val & GITS_CTLR_ENABLE);
/*
* Try to process any pending commands. This function bails out early
* if the ITS is disabled or no commands have been queued.
*/
vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \

View File

@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
struct kvm_vcpu *requester_vcpu;
spin_lock(&irq->irq_lock);
/*
* The vcpu parameter here can mean multiple things depending on how
* this function is called; when handling a trap from the kernel it
* depends on the GIC version, and these functions are also called as
* part of save/restore from userspace.
*
* Therefore, we have to figure out the requester in a reliable way.
*
* When accessing VGIC state from user space, the requester_vcpu is
* NULL, which is fine, because we guarantee that no VCPUs are running
* when accessing VGIC state from user space so irq->vcpu->cpu is
* always -1.
*/
requester_vcpu = kvm_arm_get_running_vcpu();
/*
* If this virtual IRQ was written into a list register, we
* have to make sure the CPU that runs the VCPU thread has
* synced back LR state to the struct vgic_irq. We can only
* know this for sure, when either this irq is not assigned to
* anyone's AP list anymore, or the VCPU thread is not
* running on any CPUs.
* synced back the LR state to the struct vgic_irq.
*
* In the opposite case, we know the VCPU thread may be on its
* way back from the guest and still has to sync back this
* IRQ, so we release and re-acquire the spin_lock to let the
* other thread sync back the IRQ.
* As long as the conditions below are true, we know the VCPU thread
* may be on its way back from the guest (we kicked the VCPU thread in
* vgic_change_active_prepare) and still has to sync back this IRQ,
* so we release and re-acquire the spin_lock to let the other thread
* sync back the IRQ.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);

View File

@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
* way, so we force SRE to 1 to demonstrate this to the guest.
* Also, we don't support any form of IRQ/FIQ bypass.
* This goes with the spec allowing the value to be RAO/WI.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
ICC_SRE_EL1_DFB |
ICC_SRE_EL1_SRE);
vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
} else {
vgic_v3->vgic_sre = 0;