vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

Rather than relying on a notifier for associating the KVM with
the group, let's assume that the association has already been
made prior to device_open.  The first time a device is opened
associate the group KVM with the device.

This fixes a user-triggerable oops in GVT.

Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Zhi Wang <zhi.a.wang@intel.com>
Link: https://lore.kernel.org/r/20220519183311.582380-2-mjrosato@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Matthew Rosato 2022-05-19 14:33:11 -04:00 committed by Alex Williamson
parent c490513c81
commit 421cfe6596
7 changed files with 60 additions and 162 deletions

View file

@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;
static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
{
struct kvm *kvm = vgpu->kvm;
struct kvm *kvm = vgpu->vfio_device.kvm;
int idx;
bool ret;
@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
if (!vgpu->attached)
return -EINVAL;
pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
if (is_error_noslot_pfn(pfn))
return -EINVAL;
return PageTransHuge(pfn_to_page(pfn));

View file

@ -227,9 +227,6 @@ struct intel_vgpu {
struct mutex cache_lock;
struct notifier_block iommu_notifier;
struct notifier_block group_notifier;
struct kvm *kvm;
struct work_struct release_work;
atomic_t released;
struct kvm_page_track_notifier_node track_node;

View file

@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
}
}
static void intel_vgpu_release_work(struct work_struct *work);
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size)
{
@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
static int intel_vgpu_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct intel_vgpu *vgpu =
container_of(nb, struct intel_vgpu, group_notifier);
/* the only action we care about */
if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
vgpu->kvm = data;
if (!data)
schedule_work(&vgpu->release_work);
}
return NOTIFY_OK;
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
{
struct intel_vgpu *itr;
@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
if (!itr->attached)
continue;
if (vgpu->kvm == itr->kvm) {
if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
ret = true;
goto out;
}
@ -806,7 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
int ret;
vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
@ -817,38 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
goto out;
}
events = VFIO_GROUP_NOTIFY_SET_KVM;
ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
&vgpu->group_notifier);
if (ret != 0) {
gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
ret);
ret = -EEXIST;
if (vgpu->attached)
goto undo_iommu;
ret = -ESRCH;
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
goto undo_iommu;
}
ret = -EEXIST;
if (vgpu->attached)
goto undo_register;
ret = -ESRCH;
if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
goto undo_register;
}
kvm_get_kvm(vgpu->vfio_device.kvm);
ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
goto undo_register;
goto undo_iommu;
vgpu->attached = true;
kvm_get_kvm(vgpu->kvm);
kvmgt_protect_table_init(vgpu);
gvt_cache_init(vgpu);
vgpu->track_node.track_write = kvmgt_page_track_write;
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
&vgpu->track_node);
debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
&vgpu->nr_cache_entries);
@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;
undo_register:
vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
&vgpu->group_notifier);
undo_iommu:
vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier);
@ -880,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
}
}
static void __intel_vgpu_release(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
int ret;
@ -898,35 +869,19 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for iommu failed: %d\n", ret);
ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
&vgpu->group_notifier);
drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for group failed: %d\n", ret);
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
kvm_put_kvm(vgpu->kvm);
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
&vgpu->track_node);
kvmgt_protect_table_destroy(vgpu);
gvt_cache_destroy(vgpu);
intel_vgpu_release_msi_eventfd_ctx(vgpu);
vgpu->kvm = NULL;
vgpu->attached = false;
}
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
__intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
}
static void intel_vgpu_release_work(struct work_struct *work)
{
struct intel_vgpu *vgpu =
container_of(work, struct intel_vgpu, release_work);
__intel_vgpu_release(vgpu);
if (vgpu->vfio_device.kvm)
kvm_put_kvm(vgpu->vfio_device.kvm);
}
static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@ -1675,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
return PTR_ERR(vgpu);
}
INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
&intel_vgpu_dev_ops);
@ -1713,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
{
struct kvm *kvm = info->kvm;
struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot;
int idx;
@ -1743,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
{
struct kvm *kvm = info->kvm;
struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot;
int idx;

View file

@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
}
}
static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
int notify_rc = NOTIFY_OK;
struct ap_matrix_mdev *matrix_mdev;
if (action != VFIO_GROUP_NOTIFY_SET_KVM)
return NOTIFY_OK;
matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
if (!data)
vfio_ap_mdev_unset_kvm(matrix_mdev);
else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
notify_rc = NOTIFY_DONE;
return notify_rc;
}
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
{
struct device *dev;
@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
unsigned long events;
int ret;
matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
events = VFIO_GROUP_NOTIFY_SET_KVM;
if (!vdev->kvm)
return -EINVAL;
ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
&matrix_mdev->group_notifier);
ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
if (ret)
return ret;
@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
&matrix_mdev->iommu_notifier);
if (ret)
goto out_unregister_group;
goto err_kvm;
return 0;
out_unregister_group:
vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
err_kvm:
vfio_ap_mdev_unset_kvm(matrix_mdev);
return ret;
}
@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier);
vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}

View file

@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
* @group_notifier: notifier block used for specifying callback function for
* handling the VFIO_GROUP_NOTIFY_SET_KVM event
* @iommu_notifier: notifier block used for specifying callback function for
* handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
@ -94,7 +92,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
struct notifier_block group_notifier;
struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;

View file

@ -1088,10 +1088,21 @@ static struct file *vfio_device_open(struct vfio_device *device)
mutex_lock(&device->dev_set->lock);
device->open_count++;
if (device->open_count == 1 && device->ops->open_device) {
ret = device->ops->open_device(device);
if (ret)
goto err_undo_count;
if (device->open_count == 1) {
/*
* Here we pass the KVM pointer with the group under the read
* lock. If the device driver will use it, it must obtain a
* reference and release it during close_device.
*/
down_read(&device->group->group_rwsem);
device->kvm = device->group->kvm;
if (device->ops->open_device) {
ret = device->ops->open_device(device);
if (ret)
goto err_undo_count;
}
up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);
@ -1124,10 +1135,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
err_undo_count:
device->open_count--;
if (device->open_count == 0 && device->kvm)
device->kvm = NULL;
up_read(&device->group->group_rwsem);
mutex_unlock(&device->dev_set->lock);
module_put(device->dev->driver->owner);
err_unassign_container:
@ -1320,9 +1335,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);
module_put(device->dev->driver->owner);
@ -1731,8 +1750,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
* @file: VFIO group file
* @kvm: KVM to link
*
* The kvm pointer will be forwarded to all the vfio_device's attached to the
* VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
* When a VFIO device is first opened the KVM will be available in
* device->kvm if one was associated with the group.
*/
void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
{
@ -1743,8 +1762,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
down_write(&group->group_rwsem);
group->kvm = kvm;
blocking_notifier_call_chain(&group->notifier,
VFIO_GROUP_NOTIFY_SET_KVM, kvm);
up_write(&group->group_rwsem);
}
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@ -2011,7 +2028,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
struct vfio_iommu_driver *driver;
int ret;
down_read(&group->group_rwsem);
lockdep_assert_held_read(&group->group_rwsem);
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->register_notifier))
@ -2019,7 +2037,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
events, nb);
else
ret = -ENOTTY;
up_read(&group->group_rwsem);
return ret;
}
@ -2031,7 +2048,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
struct vfio_iommu_driver *driver;
int ret;
down_read(&group->group_rwsem);
lockdep_assert_held_read(&group->group_rwsem);
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->unregister_notifier))
@ -2039,47 +2057,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
nb);
else
ret = -ENOTTY;
up_read(&group->group_rwsem);
return ret;
}
static int vfio_register_group_notifier(struct vfio_group *group,
unsigned long *events,
struct notifier_block *nb)
{
int ret;
bool set_kvm = false;
if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
set_kvm = true;
/* clear known events */
*events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
/* refuse to continue if still events remaining */
if (*events)
return -EINVAL;
ret = blocking_notifier_chain_register(&group->notifier, nb);
if (ret)
return ret;
/*
* The attaching of kvm and vfio_group might already happen, so
* here we replay once upon registration.
*/
if (set_kvm) {
down_read(&group->group_rwsem);
if (group->kvm)
blocking_notifier_call_chain(&group->notifier,
VFIO_GROUP_NOTIFY_SET_KVM,
group->kvm);
up_read(&group->group_rwsem);
}
return 0;
}
int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type, unsigned long *events,
struct notifier_block *nb)
@ -2095,9 +2076,6 @@ int vfio_register_notifier(struct vfio_device *device,
case VFIO_IOMMU_NOTIFY:
ret = vfio_register_iommu_notifier(group, events, nb);
break;
case VFIO_GROUP_NOTIFY:
ret = vfio_register_group_notifier(group, events, nb);
break;
default:
ret = -EINVAL;
}
@ -2119,9 +2097,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
case VFIO_IOMMU_NOTIFY:
ret = vfio_unregister_iommu_notifier(group, nb);
break;
case VFIO_GROUP_NOTIFY:
ret = blocking_notifier_chain_unregister(&group->notifier, nb);
break;
default:
ret = -EINVAL;
}

View file

@ -36,6 +36,8 @@ struct vfio_device {
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
unsigned int migration_flags;
/* Driver must reference the kvm during open_device or never touch it */
struct kvm *kvm;
/* Members below here are private, not for driver use */
refcount_t refcount;
@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
/* each type has independent events */
enum vfio_notify_type {
VFIO_IOMMU_NOTIFY = 0,
VFIO_GROUP_NOTIFY = 1,
};
/* events for VFIO_IOMMU_NOTIFY */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
/* events for VFIO_GROUP_NOTIFY */
#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
extern int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type,
unsigned long *required_events,