KVM: PPC: Book3S HV P9: implement hash guest support

Implement hash guest support. Guest entry/exit has to restore and
save/clear the SLB, plus several other bits to accommodate hash guests
in the P9 path. Radix host, hash guest support is removed from the P7/8
path.

The HPT hcalls and faults are not handled in real mode, which is a
performance regression. A worst-case fork/exit microbenchmark takes 3x
longer after this patch. kbuild benchmark performance is in the noise,
but the slowdown is likely to be noticed somewhere.

For now, accept this penalty for the benefit of simplifying the P7/8
paths and unifying P9 hash with the new code, because hash is a less
important configuration than radix on processors that support it. Hash
will benefit from future optimisations to this path, including possibly
a faster path to handle such hcalls and interrupts without doing a full
exit.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-31-npiggin@gmail.com
This commit is contained in:
Nicholas Piggin 2021-05-28 19:07:50 +10:00 committed by Michael Ellerman
parent ac3c8b41c2
commit 079a09a500
6 changed files with 102 additions and 39 deletions

View file

@ -147,7 +147,7 @@
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
#define KVM_GUEST_MODE_HOST_HV 4
#define KVM_GUEST_MODE_HV_FAST 5 /* ISA >= v3.0 host+guest radix */
#define KVM_GUEST_MODE_HV_FAST 5 /* ISA >= v3.0 host radix */
#define KVM_INST_FETCH_FAILED -1

View file

@ -184,7 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* void kvmppc_p9_enter_guest(struct vcpu *vcpu);
*
* Enter the guest on a ISAv3.0 or later system where we have exactly
* one vcpu per vcore, and both the host and guest are radix.
* one vcpu per vcore, and the host is radix.
*/
.balign IFETCH_ALIGN_BYTES
_GLOBAL(kvmppc_p9_enter_guest)

View file

@ -3875,7 +3875,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
}
kvmppc_xive_pull_vcpu(vcpu);
vcpu->arch.slb_max = 0;
if (kvm_is_radix(vcpu->kvm))
vcpu->arch.slb_max = 0;
}
dec = mfspr(SPRN_DEC);
@ -4110,7 +4111,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
* kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
@ -4289,8 +4289,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
if (!kvm->arch.mmu_ready)
kvmhv_setup_mmu(vcpu);
if (!kvm->arch.mmu_ready) {
r = kvmhv_setup_mmu(vcpu);
if (r) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
run->fail_entry.hardware_entry_failure_reason = 0;
vcpu->arch.ret = r;
return r;
}
}
if (need_resched())
cond_resched();
@ -4303,7 +4310,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
if (kvm_is_radix(kvm))
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
@ -4503,7 +4511,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
if (kvm_is_radix(kvm))
if (radix_enabled())
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else

View file

@ -4,6 +4,7 @@
#include <asm/asm-prototypes.h>
#include <asm/dbell.h>
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
@ -55,6 +56,12 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
#define accumulate_time(vcpu, next) do {} while (0)
#endif
static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
{
asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
}
static inline void mtslb(u64 slbee, u64 slbev)
{
asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
@ -65,6 +72,12 @@ static inline void clear_slb_entry(unsigned int idx)
mtslb(idx, 0);
}
static inline void slb_clear_invalidate_partition(void)
{
clear_slb_entry(0);
asm volatile(PPC_SLBIA(6));
}
/*
* Malicious or buggy radix guests may have inserted SLB entries
* (only 0..3 because radix always runs with UPRT=1), so these must
@ -81,7 +94,6 @@ static void radix_clear_slb(void)
static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
struct kvm_nested_guest *nested = vcpu->arch.nested;
u32 lpid;
@ -99,9 +111,23 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
isync();
mtspr(SPRN_PID, vcpu->arch.pid);
isync();
}
/* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
{
u32 lpid;
int i;
lpid = kvm->arch.lpid;
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, vcpu->arch.pid);
for (i = 0; i < vcpu->arch.slb_max; i++)
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
isync();
}
static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
@ -115,9 +141,36 @@ static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
isync();
}
static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
if (kvm_is_radix(kvm)) {
radix_clear_slb();
} else {
int i;
int nr = 0;
/*
* This must run before switching to host (radix host can't
* access all SLBs).
*/
for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 slbee, slbev;
mfslb(i, &slbee, &slbev);
if (slbee & SLB_ESID_V) {
vcpu->arch.slb[nr].orige = slbee | i;
vcpu->arch.slb[nr].origv = slbev;
nr++;
}
}
vcpu->arch.slb_max = nr;
slb_clear_invalidate_partition();
}
}
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
s64 hdec;
u64 tb, purr, spurr;
@ -218,10 +271,21 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_AMOR, ~0UL);
if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
if (kvm_is_radix(kvm)) {
if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(0, 1); /* clear RI */
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
} else {
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
}
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
/*
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
@ -229,9 +293,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(0, 1); /* clear RI */
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
@ -239,10 +300,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
accumulate_time(vcpu, &vcpu->arch.guest_time);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
kvmppc_p9_enter_guest(vcpu);
// Radix host and guest means host never runs with guest MMU state
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
accumulate_time(vcpu, &vcpu->arch.rm_intr);
@ -343,8 +401,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
#endif
}
radix_clear_slb();
accumulate_time(vcpu, &vcpu->arch.rm_exit);
/* Advance host PURR/SPURR by the amount used by guest */
@ -378,11 +434,14 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_DAWRX1, host_dawrx1);
}
/*
* Since this is radix, do a eieio; tlbsync; ptesync sequence in
* case we interrupted the guest between a tlbie and a ptesync.
*/
asm volatile("eieio; tlbsync; ptesync");
if (kvm_is_radix(kvm)) {
/*
* Since this is radix, do a eieio; tlbsync; ptesync sequence
* in case we interrupted the guest between a tlbie and a
* ptesync.
*/
asm volatile("eieio; tlbsync; ptesync");
}
/*
* cp_abort is required if the processor supports local copy-paste
@ -408,7 +467,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_HDEC, 0x7fffffff);
save_clear_guest_mmu(kvm, vcpu);
switch_mmu_to_host_radix(kvm, host_pidr);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
/*
* If we are in real mode, only switch MMU on after the MMU is

View file

@ -57,6 +57,10 @@ static int global_invalidates(struct kvm *kvm)
else
global = 1;
/* LPID has been switched to host if in virt mode so can't do local */
if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
global = 1;
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();

View file

@ -888,14 +888,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
/* For hash guest, clear out and reload the SLB */
BEGIN_MMU_FTR_SECTION
/* Radix host won't have populated the SLB, so no need to clear */
/* Clear out and reload the SLB */
li r6, 0
slbmte r6, r6
PPC_SLBIA(6)
ptesync
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4)
@ -1373,9 +1370,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
stw r5,VCPU_SLB_MAX(r9)
/* load host SLB entries */
BEGIN_MMU_FTR_SECTION
b guest_bypass
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@ -3131,10 +3125,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
PPC_SLBIA(6)
ptesync
BEGIN_MMU_FTR_SECTION
b 4f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
/* load host SLB entries */
ld r8, PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@ -3148,7 +3138,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3: addi r8, r8, 16
.endr
4: lwz r7, KVM_HOST_LPID(r10)
lwz r7, KVM_HOST_LPID(r10)
mtspr SPRN_LPID, r7
mtspr SPRN_PID, r0
ld r8, KVM_HOST_LPCR(r10)