Merge branch kvm-arm64/spec-ptw into kvmarm-master/next

* kvm-arm64/spec-ptw:
  : .
  : On taking an exception from EL1&0 to EL2(&0), the page table walker is
  : allowed to carry on with speculative walks started from EL1&0 while
  : running at EL2 (see R_LFHQG). Given that the PTW may be actively using
  : the EL1&0 system registers, the only safe way to deal with it is to
  : issue a DSB before changing any of it.
  :
  : We already did the right thing for SPE and TRBE, but ignored the PTW
  : for unknown reasons (probably because the architecture wasn't crystal
  : clear at the time).
  :
  : This requires a bit of surgery in the nvhe code, though most of these
  : patches are comments so that my future self can understand the purpose
  : of these barriers. The VHE code is largely unaffected, thanks to the
  : DSB in the context switch.
  : .
  KVM: arm64: vhe: Drop extra isb() on guest exit
  KVM: arm64: vhe: Synchronise with page table walker on MMU update
  KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc()
  KVM: arm64: nvhe: Synchronise with page table walker on TLBI
  KVM: arm64: nvhe: Synchronise with page table walker on vcpu run

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2023-04-21 09:44:58 +01:00
commit 36fe1b29b3
6 changed files with 69 additions and 15 deletions

View file

@ -37,7 +37,6 @@ static void __debug_save_spe(u64 *pmscr_el1)
/* Now drain all buffered data to memory */
psb_csync();
dsb(nsh);
}
static void __debug_restore_spe(u64 pmscr_el1)
@ -69,7 +68,6 @@ static void __debug_save_trace(u64 *trfcr_el1)
isb();
/* Drain the trace buffer to memory */
tsb_csync();
dsb(nsh);
}
static void __debug_restore_trace(u64 trfcr_el1)

View file

@ -297,6 +297,13 @@ int __pkvm_prot_finalize(void)
params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_mmu.arch.vtcr;
params->hcr_el2 |= HCR_VM;
/*
* The CMO below not only cleans the updated params to the
* PoC, but also provides the DSB that ensures ongoing
* page-table walks that have started before we trapped to EL2
* have completed.
*/
kvm_flush_dcache_to_poc(params, sizeof(*params));
write_sysreg(params->hcr_el2, hcr_el2);

View file

@ -272,6 +272,17 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
__debug_save_host_buffers_nvhe(vcpu);
/*
* We're about to restore some new MMU state. Make sure
* ongoing page-table walks that have started before we
* trapped to EL2 have completed. This also synchronises the
* above disabling of SPE and TRBE.
*
* See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
* rule R_LFHQG and subsequent information statements.
*/
dsb(nsh);
__kvm_adjust_pc(vcpu);
/*
@ -306,6 +317,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__timer_disable_traps(vcpu);
__hyp_vgic_save_state(vcpu);
/*
* Same thing as before the guest run: we're about to switch
* the MMU context, so let's make sure we don't have any
* ongoing EL1&0 translations.
*/
dsb(nsh);
__deactivate_traps(vcpu);
__load_host_stage2();

View file

@ -15,8 +15,31 @@ struct tlb_inv_context {
};
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
struct tlb_inv_context *cxt,
bool nsh)
{
/*
* We have two requirements:
*
* - ensure that the page table updates are visible to all
* CPUs, for which a dsb(DOMAIN-st) is what we need, DOMAIN
* being either ish or nsh, depending on the invalidation
* type.
*
* - complete any speculative page table walk started before
* we trapped to EL2 so that we can mess with the MM
* registers out of context, for which dsb(nsh) is enough
*
* The composition of these two barriers is a dsb(DOMAIN), and
* the 'nsh' parameter tracks the distinction between
* Inner-Shareable and Non-Shareable, as specified by the
* callers.
*/
if (nsh)
dsb(nsh);
else
dsb(ish);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
@ -60,10 +83,8 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
{
struct tlb_inv_context cxt;
dsb(ishst);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
__tlb_switch_to_guest(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@ -113,10 +134,8 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
dsb(ishst);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
__tlb_switch_to_guest(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
@ -130,7 +149,7 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
__tlb_switch_to_guest(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
@ -142,7 +161,8 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
void __kvm_flush_vm_context(void)
{
dsb(ishst);
/* Same remark as in __tlb_switch_to_guest() */
dsb(ish);
__tlbi(alle1is);
/*

View file

@ -227,11 +227,10 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/*
* When we exit from the guest we change a number of CPU configuration
* parameters, such as traps. Make sure these changes take effect
* before running the host or additional guests.
* parameters, such as traps. We rely on the isb() in kvm_call_hyp*()
* to make sure these changes take effect before running the host or
* additional guests.
*/
isb();
return ret;
}

View file

@ -13,6 +13,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_nested.h>
/*
* VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
@ -69,6 +70,17 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
__sysreg_save_user_state(host_ctxt);
/*
* When running a normal EL1 guest, we only load a new vcpu
* after a context switch, which imvolves a DSB, so all
* speculative EL1&0 walks will have already completed.
* If running NV, the vcpu may transition between vEL1 and
* vEL2 without a context switch, so make sure we complete
* those walks before loading a new context.
*/
if (vcpu_has_nv(vcpu))
dsb(nsh);
/*
* Load guest EL1 and user state
*