Merge branch 'topic/ppc-kvm' into next

Merge some powerpc KVM patches we are keeping in a topic branch just in
case anyone else needs to merge them.
This commit is contained in:
Michael Ellerman 2021-04-18 23:55:12 +10:00
commit a38cb41719
7 changed files with 109 additions and 39 deletions

View file

@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot,
unsigned long *map);
extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm,
unsigned long lpcr);
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);

View file

@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn,
unsigned long va);
unsigned long pte_index, unsigned long avpn);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,

View file

@ -2503,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100)
INT_DEFINE_BEGIN(cbe_system_error)
IVEC=0x1200
IHSRR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(cbe_system_error)
EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
@ -2524,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
#endif
/**
* Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
* This has been removed from the ISA before 2.01, which is the earliest
* 64-bit BookS ISA supported, however the G5 / 970 implements this
* interrupt with a non-architected feature available through the support
* processor interface.
*/
INT_DEFINE_BEGIN(instruction_breakpoint)
IVEC=0x1300
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_SKIP=1
IKVM_REAL=1
#endif
INT_DEFINE_END(instruction_breakpoint)
@ -2674,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
INT_DEFINE_BEGIN(cbe_maintenance)
IVEC=0x1600
IHSRR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(cbe_maintenance)
EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
@ -2727,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
INT_DEFINE_BEGIN(cbe_thermal)
IVEC=0x1800
IHSRR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(cbe_thermal)
EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)

View file

@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
vcpu->arch.dawrx1 = value2;
return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
/* KVM does not support mflags=2 (AIL=2) */
/*
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
if (mflags != 0 && mflags != 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
return 0;
}
/*
* Enforce limits on guest LPCR values based on hardware availability,
* guest configuration, and possibly hypervisor support and security
* concerns.
*/
unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
{
/* LPCR_TC only applies to HPT guests */
if (kvm_is_radix(kvm))
lpcr &= ~LPCR_TC;
/* On POWER8 and above, userspace can modify AIL */
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
/*
* On POWER9, allow userspace to enable large decrementer for the
* guest, whether or not the host has it enabled.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_300))
lpcr &= ~LPCR_LD;
return lpcr;
}
static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
{
if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
}
}
static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
bool preserve_top32)
{
@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
u64 mask;
spin_lock(&vc->lock);
/*
* Userspace can only modify
* DPFD (default prefetch depth), ILE (interrupt little-endian),
* TC (translation control), AIL (alternate interrupt location),
* LD (large decrementer).
* These are subject to restrictions from kvmppc_filter_lcpr_hv().
*/
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
mask &= 0xFFFFFFFF;
new_lpcr = kvmppc_filter_lpcr_hv(kvm,
(vc->lpcr & ~mask) | (new_lpcr & mask));
/*
* If ILE (interrupt little-endian) has changed, update the
* MSR_LE bit in the intr_msr for each vcpu in this vcore.
@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
}
}
/*
* Userspace can only modify DPFD (default prefetch depth),
* ILE (interrupt little-endian) and TC (translation control).
* On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
*/
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mask |= LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
* guest, whether or not the host has it enabled.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
mask |= LPCR_LD;
vc->lpcr = new_lpcr;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
mask &= 0xFFFFFFFF;
vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
spin_unlock(&vc->lock);
}
@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
/* Save guest CTRL register, set runlatch to 1 */
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_DSCR, host_dscr);
mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr);
mtspr(SPRN_PSPB, 0);
if (host_amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_amr);
@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
struct kvmppc_vcore *vc = kvm->arch.vcores[i];
if (!vc)
continue;
spin_lock(&vc->lock);
vc->lpcr = (vc->lpcr & ~mask) | lpcr;
verify_lpcr(kvm, vc->lpcr);
spin_unlock(&vc->lock);
if (++cores_done >= kvm->arch.online_vcores)
break;
@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvmppc_setup_partition_table(kvm);
}
verify_lpcr(kvm, lpcr);
kvm->arch.lpcr = lpcr;
/* Initialization for future HPT resizes */
@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = {
H_READ,
H_PROTECT,
H_BULK_REMOVE,
#ifdef CONFIG_SPAPR_TCE_IOMMU
H_GET_TCE,
H_PUT_TCE,
#endif
H_SET_DABR,
H_SET_XDABR,
H_CEDE,

View file

@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
{
/* Guest must always run with ME enabled, HV disabled. */
msr = (msr | MSR_ME) & ~MSR_HV;
/*
* Check for illegal transactional state bit combination
* and if we find it, force the TS field to a safe state.

View file

@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
}
}
/*
* This can result in some L0 HV register state being leaked to an L1
* hypervisor when the hv_guest_state is copied back to the guest after
* being modified here.
*
* There is no known problem with such a leak, and in many cases these
* register settings could be derived by the guest by observing behaviour
* and timing, interrupts, etc., but it is an issue to consider.
*/
static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 mask;
/*
* Don't let L1 change LPCR bits for the L2 except these:
*/
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
LPCR_LPES | LPCR_MER;
/*
* Additional filtering is required depending on hardware
* and configuration.
*/
hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
(vc->lpcr & ~mask) | (hr->lpcr & mask));
/*
* Don't let L1 enable features for L2 which we've disabled for L1,
* but preserve the interrupt cause field.
@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
u64 hv_ptr, regs_ptr;
u64 hdec_exp;
s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
u64 mask;
unsigned long lpcr;
if (vcpu->kvm->arch.l1_ptcr == 0)
return H_NOT_AVAILABLE;
@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
vcpu->arch.regs = l2_regs;
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
LPCR_LPES | LPCR_MER;
lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
/* Guest must always run with ME enabled, HV disabled. */
vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
sanitise_hv_regs(vcpu, &l2_hv);
restore_hv_regs(vcpu, &l2_hv);
@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
r = RESUME_HOST;
break;
}
r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
} while (is_kvmppc_resume_guest(r));
/* save L2 state for return */

View file

@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn,
unsigned long va)
unsigned long pte_index, unsigned long avpn)
{
struct kvm *kvm = vcpu->kvm;
__be64 *hpte;