- PV crypto passthrough enablement (Tony, Steffen, Viktor, Janosch)

Allows a PV guest to use crypto cards. Card access is governed by
   the firmware and once a crypto queue is "bound" to a PV VM every
   other entity (PV or not) looses access until it is not bound
   anymore. Enablement is done via flags when creating the PV VM.
 
 - Guest debug fixes (Ilya)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEwGNS88vfc9+v45Yq41TmuOI4ufgFAmTsgQ8ACgkQ41TmuOI4
 ufgCtRAAvSx+XmHhTL4D6QJsEm2Ahgz+9VKxlD91r+gbuw1h9rcZJcSGnZ41nxW1
 rl/cEL4sGyEP8SjlKm9LB18mJ7LoJJaCIpzckWqmbGVvkdIXd45VxvppiSWCSq3X
 TaFtfmXLi0iFznVHMHAR53if2t/exNXHHEwjAGm1byVUXy4xgqLaaXrdYBSRGVPQ
 pmHoIJTlZUux/eOSrXEzsGPuza+dIQBilvZZRs1SZJmlh0rz39XX29GZTHHv6ant
 8dkf5Q2Lkvs+jI6+6i4YCLQFzXixcLgaBjRRvRnE8aCP8/DbSjR+S+Qu4mMfWFtp
 2oO2X7rwB/vu8FM06TxRrif/03crxaYtFdWbmGJUhhwp9DS7WO27sk61Z/yWHQFX
 cviKEEvn3DvtBrrBtKrbEa04depRuwpQfwkbtnFDkGbDgxeekswMO81xV9T8VNxF
 teyUyS9Fev4XuAjmBS2F1dHv9i/Sl2uB/Uh14GvTkyBOQzrcRw8dONR0ppVi9OVO
 k0pjj9JmKpE+F39IuDYK0H+G82X67YQLk3yZfAF0zfVxV6ZrpEPtnsPe2rYPD5bW
 zdMfzOiTeBVt+JTy9Dqkt2NWjWfjt+k7ws00q4ijDlcNYLO3cgIIYUPsim53Ccue
 AG6iYyu4/o8hOSi2LkjEaPeC+wrCVfqKUOjzVa8FcZfy0wFKVdc=
 =E2KD
 -----END PGP SIGNATURE-----

Merge tag 'kvm-s390-next-6.6-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

- PV crypto passthrough enablement (Tony, Steffen, Viktor, Janosch)
  Allows a PV guest to use crypto cards. Card access is governed by
  the firmware and once a crypto queue is "bound" to a PV VM every
  other entity (PV or not) looses access until it is not bound
  anymore. Enablement is done via flags when creating the PV VM.

- Guest debug fixes (Ilya)
This commit is contained in:
Paolo Bonzini 2023-08-31 13:21:27 -04:00
commit 69fd3876a4
14 changed files with 486 additions and 95 deletions

View File

@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
__u64 *fac_list;
u64 cpuid;
unsigned short ibc;
/* subset of available UV-features for pv-guests enabled by user space */
struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
};
typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
@ -1028,6 +1030,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
extern char sie_exit;
bool kvm_s390_pv_is_protected(struct kvm *kvm);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);

View File

@ -99,6 +99,8 @@ enum uv_cmds_inst {
enum uv_feat_ind {
BIT_UV_FEAT_MISC = 0,
BIT_UV_FEAT_AIV = 1,
BIT_UV_FEAT_AP = 4,
BIT_UV_FEAT_AP_INTR = 5,
};
struct uv_cb_header {
@ -159,7 +161,15 @@ struct uv_cb_cgc {
u64 guest_handle;
u64 conf_base_stor_origin;
u64 conf_virt_stor_origin;
u64 reserved30;
u8 reserved30[6];
union {
struct {
u16 : 14;
u16 ap_instr_intr : 1;
u16 ap_allow_instr : 1;
};
u16 raw;
} flags;
u64 guest_stor_origin;
u64 guest_stor_len;
u64 guest_sca;
@ -397,6 +407,13 @@ struct uv_info {
extern struct uv_info uv_info;
static inline bool uv_has_feature(u8 feature_bit)
{
if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
return false;
return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
@ -463,6 +480,7 @@ static inline int is_prot_virt_host(void)
return prot_virt_host;
}
int uv_pin_shared(unsigned long paddr);
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_owned_page(unsigned long paddr);
@ -475,6 +493,11 @@ void setup_uv(void);
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
static inline int uv_pin_shared(unsigned long paddr)
{
return 0;
}
static inline int uv_destroy_owned_page(unsigned long paddr)
{
return 0;

View File

@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 reserved[1728];
};
#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6
#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7
#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64
struct kvm_s390_vm_cpu_uv_feat {
union {
struct {
__u64 : 4;
__u64 ap : 1; /* bit 4 */
__u64 ap_intr : 1; /* bit 5 */
__u64 : 58;
};
__u64 feat;
};
};
/* kvm attributes for crypto */
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1

View File

@ -88,7 +88,7 @@ fail:
* Requests the Ultravisor to pin the page in the shared state. This will
* cause an intercept when the guest attempts to unshare the pinned page.
*/
static int uv_pin_shared(unsigned long paddr)
int uv_pin_shared(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(uv_pin_shared);
/*
* Requests the Ultravisor to destroy a guest page and make it
@ -257,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
* shared page from a different protected VM will automatically also
* transfer its ownership.
*/
if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
if (uv_has_feature(BIT_UV_FEAT_MISC))
return false;
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
return false;

View File

@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
{
if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
return false;
if (guestdbg_sstep_enabled(vcpu) &&
vcpu->arch.sie_block->iprcc != PGM_PER) {
/*
* __vcpu_run() will exit after delivering the concurrently
* indicated condition.
*/
return false;
}
return true;
}
static int handle_prog(struct kvm_vcpu *vcpu)
{
psw_t psw;
@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EOPNOTSUPP;
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
if (should_handle_per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
return rc;
@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
return handle_instruction(vcpu);
}
static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
{
/* Process PER, also if the instruction is processed in user space. */
if (!(vcpu->arch.sie_block->icptstatus & 0x02))
return false;
if (rc != 0 && rc != -EOPNOTSUPP)
return false;
if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
/* __vcpu_run() will exit after delivering the interrupt. */
return false;
return true;
}
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
rc = handle_partial_execution(vcpu);
break;
case ICPT_KSS:
rc = kvm_s390_skey_check_enable(vcpu);
break;
/* Instruction will be redriven, skip the PER check. */
return kvm_s390_skey_check_enable(vcpu);
case ICPT_MCHKREQ:
case ICPT_INT_ENABLE:
/*
@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
/* process PER, also if the instruction is processed in user space */
if (vcpu->arch.sie_block->icptstatus & 0x02 &&
(!rc || rc == -EOPNOTSUPP))
if (should_handle_per_ifetch(vcpu, rc))
per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
return per_rc ? per_rc : rc;
}

View File

@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc = 0;
bool delivered = false;
unsigned long irq_type;
unsigned long irqs;
@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
clear_bit(irq_type, &li->pending_irqs);
}
delivered |= !rc;
}
/*
* We delivered at least one interrupt and modified the PC. Force a
* singlestep event now.
*/
if (delivered && guestdbg_sstep_enabled(vcpu)) {
struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
debug_exit->type = KVM_SINGLESTEP;
vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
}
set_intercept_indicators(vcpu);

View File

@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
return 0;
}
#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \
( \
((struct kvm_s390_vm_cpu_uv_feat){ \
.ap = 1, \
.ap_intr = 1, \
}) \
.feat \
)
static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
unsigned long data, filter;
filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
if (get_user(data, &ptr->feat))
return -EFAULT;
if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
return -EINVAL;
mutex_lock(&kvm->lock);
if (kvm->created_vcpus) {
mutex_unlock(&kvm->lock);
return -EBUSY;
}
kvm->arch.model.uv_feat_guest.feat = data;
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
return 0;
}
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_set_processor_subfunc(kvm, attr);
break;
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = kvm_s390_set_uv_feat(kvm, attr);
break;
}
return ret;
}
@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
return 0;
}
static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
if (put_user(feat, &dst->feat))
return -EFAULT;
VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
return 0;
}
static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
unsigned long feat;
BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
if (put_user(feat, &dst->feat))
return -EFAULT;
VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
return 0;
}
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = kvm_s390_get_machine_subfunc(kvm, attr);
break;
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = kvm_s390_get_processor_uv_feat(kvm, attr);
break;
case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
ret = kvm_s390_get_machine_uv_feat(kvm, attr);
break;
}
return ret;
}
@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MACHINE_FEAT:
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = 0;
break;
default:
@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
struct kvm_vcpu *vcpu;
/* Disable the GISA if the ultravisor does not support AIV. */
if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
if (!uv_has_feature(BIT_UV_FEAT_AIV))
kvm_s390_gisa_disable(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
kvm->arch.model.uv_feat_guest.feat = 0;
kvm_s390_crypto_init(kvm);
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc)
if (rc || guestdbg_exit_pending(vcpu))
return rc;
}
@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
do {
rc = vcpu_pre_run(vcpu);
if (rc)
if (rc || guestdbg_exit_pending(vcpu))
break;
kvm_vcpu_srcu_read_unlock(vcpu);
@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
int rc;
switch (ioctl) {
case KVM_S390_IRQ: {
@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
return -EFAULT;
return kvm_s390_inject_vcpu(vcpu, &s390irq);
rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
break;
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
return -EFAULT;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
return kvm_s390_inject_vcpu(vcpu, &s390irq);
rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
break;
}
default:
rc = -ENOIOCTLCMD;
break;
}
return -ENOIOCTLCMD;
/*
* To simplify single stepping of userspace-emulated instructions,
* KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
* should_handle_per_ifetch()). However, if userspace emulation injects
* an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
* after (and not before) the interrupt delivery.
*/
if (!rc)
vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
return rc;
}
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,

View File

@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
return vcpu->arch.pv.handle;
}
static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
lockdep_assert_held(&kvm->lock);
return !!kvm_s390_pv_get_handle(kvm);
}
static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
{
lockdep_assert_held(&vcpu->mutex);
return !!kvm_s390_pv_cpu_get_handle(vcpu);
}
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);

View File

@ -18,6 +18,20 @@
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
lockdep_assert_held(&kvm->lock);
return !!kvm_s390_pv_get_handle(kvm);
}
EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
{
lockdep_assert_held(&vcpu->mutex);
return !!kvm_s390_pv_cpu_get_handle(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
/**
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
* be destroyed
@ -271,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
uvcb.header.rc, uvcb.header.rrc);
WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
WARN_ONCE(cc && uvcb.header.rc != 0x104,
"protvirt destroy vm fast failed handle %llx rc %x rrc %x",
kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
/* Intended memory leak on "impossible" error */
if (!cc)
@ -561,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
uvcb.conf_base_stor_origin =
virt_to_phys((void *)kvm->arch.pv.stor_base);
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
cc = uv_call_sched(0, (u64)&uvcb);
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
/* Outputs */
kvm->arch.pv.handle = uvcb.guest_handle;

View File

@ -827,7 +827,7 @@ void do_secure_storage_access(struct pt_regs *regs)
* reliable without the misc UV feature so we need to check
* for that as well.
*/
if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
if (uv_has_feature(BIT_UV_FEAT_MISC) &&
!test_bit_inv(61, &regs->int_parm_long)) {
/*
* When this happens, userspace did something that it

View File

@ -30,13 +30,12 @@
#define AP_QUEUE_UNASSIGNED "unassigned"
#define AP_QUEUE_IN_USE "in use"
#define MAX_RESET_CHECK_WAIT 200 /* Sleep max 200ms for reset check */
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
/**
* get_update_locks_for_kvm: Acquire the locks required to dynamically update a
@ -360,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
return 0;
}
static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
{
int ret;
/*
* The nib has to be located in shared storage since guest and
* host access it. vfio_pin_pages() will do a pin shared and
* if that fails (possibly because it's not a shared page) it
* calls export. We try to do a second pin shared here so that
* the UV gives us an error code if we try to pin a non-shared
* page.
*
* If the page is already pinned shared the UV will return a success.
*/
ret = uv_pin_shared(addr);
if (ret) {
/* vfio_pin_pages() likely exported the page so let's re-import */
gmap_convert_to_secure(gmap, addr);
}
return ret;
}
/**
* vfio_ap_irq_enable - Enable Interruption for a APQN
*
@ -423,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
aqic_gisa.gisc = isc;
/* NIB in non-shared storage is a rc 6 for PV guests */
if (kvm_s390_pv_cpu_is_protected(vcpu) &&
ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
status.response_code = AP_RESPONSE_INVALID_ADDRESS;
return status;
}
nisc = kvm_s390_gisc_register(kvm, isc);
if (nisc < 0) {
VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
@ -675,7 +704,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
*/
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q || q->reset_rc) {
if (!q || q->reset_status.response_code) {
clear_bit_inv(apid,
matrix_mdev->shadow_apcb.apm);
break;
@ -1608,19 +1637,21 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
{
switch (status->response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
if (status->queue_empty && !status->irq_enabled)
return 0;
return -EBUSY;
case AP_RESPONSE_DECONFIGURED:
/*
* If the AP queue is deconfigured, any subsequent AP command
* targeting the queue will fail with the same response code. On the
* other hand, when an AP adapter is deconfigured, the associated
* queues are reset, so let's return a value indicating the reset
* for which we're waiting completed successfully.
*/
return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
return -EBUSY;
case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE:
case AP_RESPONSE_ASSOC_FAILED:
/*
* These asynchronous response codes indicate a PQAP(AAPQ)
* instruction to associate a secret with the guest failed. All
* subsequent AP instructions will end with the asynchronous
* response code until the AP queue is reset; so, let's return
* a value indicating a reset needs to be performed again.
*/
return -EAGAIN;
default:
WARN(true,
"failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",
@ -1630,91 +1661,105 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
}
}
static int apq_reset_check(struct vfio_ap_queue *q)
{
int ret;
int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL;
struct ap_queue_status status;
#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
for (; iters > 0; iters--) {
static void apq_reset_check(struct work_struct *reset_work)
{
int ret = -EBUSY, elapsed = 0;
struct ap_queue_status status;
struct vfio_ap_queue *q;
q = container_of(reset_work, struct vfio_ap_queue, reset_work);
memcpy(&status, &q->reset_status, sizeof(status));
while (true) {
msleep(AP_RESET_INTERVAL);
elapsed += AP_RESET_INTERVAL;
status = ap_tapq(q->apqn, NULL);
ret = apq_status_check(q->apqn, &status);
if (ret != -EBUSY)
return ret;
if (ret == -EIO)
return;
if (ret == -EBUSY) {
pr_notice_ratelimited(WAIT_MSG, elapsed,
AP_QID_CARD(q->apqn),
AP_QID_QUEUE(q->apqn),
status.response_code,
status.queue_empty,
status.irq_enabled);
} else {
if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
q->reset_status.response_code == AP_RESPONSE_BUSY ||
q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS ||
ret == -EAGAIN) {
status = ap_zapq(q->apqn, 0);
memcpy(&q->reset_status, &status, sizeof(status));
continue;
}
/*
* When an AP adapter is deconfigured, the
* associated queues are reset, so let's set the
* status response code to 0 so the queue may be
* passed through (i.e., not filtered)
*/
if (status.response_code == AP_RESPONSE_DECONFIGURED)
q->reset_status.response_code = 0;
if (q->saved_isc != VFIO_AP_ISC_INVALID)
vfio_ap_free_aqic_resources(q);
break;
}
}
WARN_ONCE(iters <= 0,
"timeout verifying reset of queue %02x.%04x (%u, %u, %u)",
AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
status.queue_empty, status.irq_enabled, status.response_code);
return ret;
}
static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
{
struct ap_queue_status status;
int ret;
if (!q)
return 0;
retry_zapq:
return;
status = ap_zapq(q->apqn, 0);
q->reset_rc = status.response_code;
memcpy(&q->reset_status, &status, sizeof(status));
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
ret = 0;
/* if the reset has not completed, wait for it to take effect */
if (!status.queue_empty || status.irq_enabled)
ret = apq_reset_check(q);
break;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS:
/*
* There is a reset issued by another process in progress. Let's wait
* for that to complete. Since we have no idea whether it was a RAPQ or
* ZAPQ, then if it completes successfully, let's issue the ZAPQ.
* Let's verify whether the ZAPQ completed successfully on a work queue.
*/
ret = apq_reset_check(q);
if (ret)
break;
goto retry_zapq;
queue_work(system_long_wq, &q->reset_work);
break;
case AP_RESPONSE_DECONFIGURED:
/*
* When an AP adapter is deconfigured, the associated
* queues are reset, so let's return a value indicating the reset
* completed successfully.
* queues are reset, so let's set the status response code to 0
* so the queue may be passed through (i.e., not filtered).
*/
ret = 0;
q->reset_status.response_code = 0;
vfio_ap_free_aqic_resources(q);
break;
default:
WARN(true,
"PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
status.response_code);
return -EIO;
}
vfio_ap_free_aqic_resources(q);
return ret;
}
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
{
int ret, loop_cursor, rc = 0;
int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
vfio_ap_mdev_reset_queue(q);
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
ret = vfio_ap_mdev_reset_queue(q);
/*
* Regardless whether a queue turns out to be busy, or
* is not operational, we need to continue resetting
* the remaining queues.
*/
if (ret)
rc = ret;
flush_work(&q->reset_work);
if (q->reset_status.response_code)
ret = -EIO;
}
return rc;
return ret;
}
static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
@ -2038,6 +2083,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
q->apqn = to_ap_queue(&apdev->device)->qid;
q->saved_isc = VFIO_AP_ISC_INVALID;
memset(&q->reset_status, 0, sizeof(q->reset_status));
INIT_WORK(&q->reset_work, apq_reset_check);
matrix_mdev = get_update_locks_by_apqn(q->apqn);
if (matrix_mdev) {
@ -2087,6 +2134,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
}
vfio_ap_mdev_reset_queue(q);
flush_work(&q->reset_work);
dev_set_drvdata(&apdev->device, NULL);
kfree(q);
release_update_locks_for_mdev(matrix_mdev);

View File

@ -133,7 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
* @reset_rc: the status response code from the last reset of the queue
* @reset_status: the status from the last reset of the queue
* @reset_work: work to wait for queue reset to complete
*/
struct vfio_ap_queue {
struct ap_matrix_mdev *matrix_mdev;
@ -142,7 +143,8 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
unsigned int reset_rc;
struct ap_queue_status reset_status;
struct work_struct reset_work;
};
int vfio_ap_mdev_register(void);

View File

@ -169,6 +169,7 @@ TEST_GEN_PROGS_s390x += s390x/resets
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += s390x/cmma_test
TEST_GEN_PROGS_s390x += s390x/debug_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += guest_print_test

View File

@ -0,0 +1,160 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Test KVM debugging features. */
#include "kvm_util.h"
#include "test_util.h"
#include <linux/kvm.h>
#define __LC_SVC_NEW_PSW 0x1c0
#define __LC_PGM_NEW_PSW 0x1d0
#define ICPT_INSTRUCTION 0x04
#define IPA0_DIAG 0x8300
#define PGM_SPECIFICATION 0x06
/* Common code for testing single-stepping interruptions. */
extern char int_handler[];
asm("int_handler:\n"
"j .\n");
static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
size_t new_psw_off, uint64_t *new_psw)
{
struct kvm_guest_debug debug = {};
struct kvm_regs regs;
struct kvm_vm *vm;
char *lowcore;
vm = vm_create_with_one_vcpu(vcpu, guest_code);
lowcore = addr_gpa2hva(vm, 0);
new_psw[0] = (*vcpu)->run->psw_mask;
new_psw[1] = (uint64_t)int_handler;
memcpy(lowcore + new_psw_off, new_psw, 16);
vcpu_regs_get(*vcpu, &regs);
regs.gprs[2] = -1;
vcpu_regs_set(*vcpu, &regs);
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
vcpu_guest_debug_set(*vcpu, &debug);
vcpu_run(*vcpu);
return vm;
}
static void test_step_int(void *guest_code, size_t new_psw_off)
{
struct kvm_vcpu *vcpu;
uint64_t new_psw[2];
struct kvm_vm *vm;
vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
kvm_vm_free(vm);
}
/* Test single-stepping "boring" program interruptions. */
extern char test_step_pgm_guest_code[];
asm("test_step_pgm_guest_code:\n"
".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
"j .\n");
static void test_step_pgm(void)
{
test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
}
/*
* Test single-stepping program interruptions caused by DIAG.
* Userspace emulation must not interfere with single-stepping.
*/
extern char test_step_pgm_diag_guest_code[];
asm("test_step_pgm_diag_guest_code:\n"
"diag %r0,%r0,0\n"
"j .\n");
static void test_step_pgm_diag(void)
{
struct kvm_s390_irq irq = {
.type = KVM_S390_PROGRAM_INT,
.u.pgm.code = PGM_SPECIFICATION,
};
struct kvm_vcpu *vcpu;
uint64_t new_psw[2];
struct kvm_vm *vm;
vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
__LC_PGM_NEW_PSW, new_psw);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
kvm_vm_free(vm);
}
/*
* Test single-stepping program interruptions caused by ISKE.
* CPUSTAT_KSS handling must not interfere with single-stepping.
*/
extern char test_step_pgm_iske_guest_code[];
asm("test_step_pgm_iske_guest_code:\n"
"iske %r2,%r2\n"
"j .\n");
static void test_step_pgm_iske(void)
{
test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
}
/*
* Test single-stepping program interruptions caused by LCTL.
* KVM emulation must not interfere with single-stepping.
*/
extern char test_step_pgm_lctl_guest_code[];
asm("test_step_pgm_lctl_guest_code:\n"
"lctl %c0,%c0,1\n"
"j .\n");
static void test_step_pgm_lctl(void)
{
test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
}
/* Test single-stepping supervisor-call interruptions. */
extern char test_step_svc_guest_code[];
asm("test_step_svc_guest_code:\n"
"svc 0\n"
"j .\n");
static void test_step_svc(void)
{
test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
}
/* Run all tests above. */
static struct testdef {
const char *name;
void (*test)(void);
} testlist[] = {
{ "single-step pgm", test_step_pgm },
{ "single-step pgm caused by diag", test_step_pgm_diag },
{ "single-step pgm caused by iske", test_step_pgm_iske },
{ "single-step pgm caused by lctl", test_step_pgm_lctl },
{ "single-step svc", test_step_svc },
};
int main(int argc, char *argv[])
{
int idx;
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(testlist));
for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
testlist[idx].test();
ksft_test_result_pass("%s\n", testlist[idx].name);
}
ksft_finished();
}