VFIO updates for v5.13-rc1

- Embed struct vfio_device into vfio driver structures (Jason Gunthorpe)
 
  - Make vfio_mdev type safe (Jason Gunthorpe)
 
  - Remove vfio-pci NVLink2 extensions for POWER9 (Christoph Hellwig)
 
  - Update vfio-pci IGD extensions for OpRegion 2.1+ (Fred Gao)
 
  - Various spelling/blank line fixes (Zhen Lei, Zhou Wang, Bhaskar Chowdhury)
 
  - Simplify unpin_pages error handling (Shenming Lu)
 
  - Fix i915 mdev Kconfig dependency (Arnd Bergmann)
 
  - Remove unused structure member (Keqian Zhu)
 -----BEGIN PGP SIGNATURE-----
 
 iQJPBAABCAA5FiEEQvbATlQL0amee4qQI5ubbjuwiyIFAmCJsEsbHGFsZXgud2ls
 bGlhbXNvbkByZWRoYXQuY29tAAoJECObm247sIsi4HEP/icVw+sKYvfYkN3D5+7+
 J+IUtgdXSFaSmc9S8lj9gED68t6t5BhPMtdfSe9Tfvn/btyaYZ6uNwZGNqkaRCz8
 97jLkDmTPzL2F8uiMT4OI/VY6mzcK5yKA0mYNcO+nXQyUNtpDDCCbTD9OfIu62i/
 nVAu6u/Sj89zj66opZLTz1sxlYu/IQQ6olDlmgRAZ0JfUmpZLuNrSJbdv1nJZN0i
 uSAfNthSlgfMK/6hf9oW22GFjiJmnMVPnPd2wMceEq1N2F3Co9mNEhsMwqSZH3ra
 UQVJKRLA1NMhlee33Pkbsilmwk3lyTXI4GYw61y+TDi/CuOvNkym1BIW6WCQlQRi
 iOuMnaas3UOwi45pcqDNl9DRR+mfJc9I11auyAbznuw6omeZVxYHkWi94HRbJNzf
 zDJN94fcnafomHXe0NYJ33eSUrEeN7heNd5wvMkqQJEfHRpqUQf/xOGE9xBZXVAO
 Vl02C+qQt/fM0tds09lUOvMRjAYBRSaF2xcq6SEQYMXmfx9hcOrEUJUEWegy0NGN
 lq+Oa9mMxwHllDm9Wtn7vMx4KPqDm6dq3BwB45+S7bdQiDj0RiCohRLK9GC7YMzT
 K6dlKdYmed9dOnIzCw8R9PI/DDHRyG/mXgzFERkB5DTt1YVssYLTZnGL2lePwM9J
 BSqG2mMHzyYaHdaaVAWHH/vQ
 =y3PY
 -----END PGP SIGNATURE-----

Merge tag 'vfio-v5.13-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Embed struct vfio_device into vfio driver structures (Jason
   Gunthorpe)

 - Make vfio_mdev type safe (Jason Gunthorpe)

 - Remove vfio-pci NVLink2 extensions for POWER9 (Christoph Hellwig)

 - Update vfio-pci IGD extensions for OpRegion 2.1+ (Fred Gao)

 - Various spelling/blank line fixes (Zhen Lei, Zhou Wang, Bhaskar
   Chowdhury)

 - Simplify unpin_pages error handling (Shenming Lu)

 - Fix i915 mdev Kconfig dependency (Arnd Bergmann)

 - Remove unused structure member (Keqian Zhu)

* tag 'vfio-v5.13-rc1' of git://github.com/awilliam/linux-vfio: (43 commits)
  vfio/gvt: fix DRM_I915_GVT dependency on VFIO_MDEV
  vfio/iommu_type1: Remove unused pinned_page_dirty_scope in vfio_iommu
  vfio/mdev: Correct the function signatures for the mdev_type_attributes
  vfio/mdev: Remove kobj from mdev_parent_ops->create()
  vfio/gvt: Use mdev_get_type_group_id()
  vfio/gvt: Make DRM_I915_GVT depend on VFIO_MDEV
  vfio/mbochs: Use mdev_get_type_group_id()
  vfio/mdpy: Use mdev_get_type_group_id()
  vfio/mtty: Use mdev_get_type_group_id()
  vfio/mdev: Add mdev/mtype_get_type_group_id()
  vfio/mdev: Remove duplicate storage of parent in mdev_device
  vfio/mdev: Add missing error handling to dev_set_name()
  vfio/mdev: Reorganize mdev_device_create()
  vfio/mdev: Add missing reference counting to mdev_type
  vfio/mdev: Expose mdev_get/put_parent to mdev_private.h
  vfio/mdev: Use struct mdev_type in struct mdev_device
  vfio/mdev: Simplify driver registration
  vfio/mdev: Add missing typesafety around mdev_device
  vfio/mdev: Do not allow a mdev_type to have a NULL parent pointer
  vfio/mdev: Fix missing static's on MDEV_TYPE_ATTR's
  ...
This commit is contained in:
Linus Torvalds 2021-04-28 17:19:47 -07:00
commit 238da4d004
35 changed files with 797 additions and 1365 deletions

View file

@ -98,15 +98,13 @@ structure to represent a mediated device's driver::
/*
* struct mdev_driver [2] - Mediated device's driver
* @name: driver name
* @probe: called when new device created
* @remove: called when device removed
* @driver: device driver structure
*/
struct mdev_driver {
const char *name;
int (*probe) (struct device *dev);
void (*remove) (struct device *dev);
int (*probe) (struct mdev_device *dev);
void (*remove) (struct mdev_device *dev);
struct device_driver driver;
};
@ -115,8 +113,7 @@ to register and unregister itself with the core driver:
* Register::
extern int mdev_register_driver(struct mdev_driver *drv,
struct module *owner);
extern int mdev_register_driver(struct mdev_driver *drv);
* Unregister::

View file

@ -249,35 +249,41 @@ VFIO bus driver API
VFIO bus drivers, such as vfio-pci make use of only a few interfaces
into VFIO core. When devices are bound and unbound to the driver,
the driver should call vfio_add_group_dev() and vfio_del_group_dev()
respectively::
the driver should call vfio_register_group_dev() and
vfio_unregister_group_dev() respectively::
extern int vfio_add_group_dev(struct device *dev,
const struct vfio_device_ops *ops,
void *device_data);
void vfio_init_group_dev(struct vfio_device *device,
struct device *dev,
const struct vfio_device_ops *ops);
int vfio_register_group_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
extern void *vfio_del_group_dev(struct device *dev);
vfio_add_group_dev() indicates to the core to begin tracking the
iommu_group of the specified dev and register the dev as owned by
a VFIO bus driver. The driver provides an ops structure for callbacks
The driver should embed the vfio_device in its own structure and call
vfio_init_group_dev() to pre-configure it before going to registration.
vfio_register_group_dev() indicates to the core to begin tracking the
iommu_group of the specified dev and register the dev as owned by a VFIO bus
driver. Once vfio_register_group_dev() returns it is possible for userspace to
start accessing the driver, thus the driver should ensure it is completely
ready before calling it. The driver provides an ops structure for callbacks
similar to a file operations structure::
struct vfio_device_ops {
int (*open)(void *device_data);
void (*release)(void *device_data);
ssize_t (*read)(void *device_data, char __user *buf,
int (*open)(struct vfio_device *vdev);
void (*release)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t (*write)(void *device_data, const char __user *buf,
ssize_t (*write)(struct vfio_device *vdev,
const char __user *buf,
size_t size, loff_t *ppos);
long (*ioctl)(void *device_data, unsigned int cmd,
long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
int (*mmap)(void *device_data, struct vm_area_struct *vma);
int (*mmap)(struct vfio_device *vdev,
struct vm_area_struct *vma);
};
Each function is passed the device_data that was originally registered
in the vfio_add_group_dev() call above. This allows the bus driver
an easy place to store its opaque, private data. The open/release
Each function is passed the vdev that was originally registered
in the vfio_register_group_dev() call above. This allows the bus driver
to obtain its private data using container_of(). The open/release
callbacks are issued when a new file descriptor is created for a
device (via VFIO_GROUP_GET_DEVICE_FD). The ioctl interface provides
a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap

View file

@ -101,6 +101,7 @@ config DRM_I915_GVT
bool "Enable Intel GVT-g graphics virtualization host support"
depends on DRM_I915
depends on 64BIT
depends on VFIO_MDEV=y || VFIO_MDEV=DRM_I915
default n
help
Choose this option if you want to enable Intel GVT-g graphics

View file

@ -46,32 +46,23 @@ static const char * const supported_hypervisors[] = {
[INTEL_GVT_HYPERVISOR_KVM] = "KVM",
};
static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
const char *name)
static struct intel_vgpu_type *
intel_gvt_find_vgpu_type(struct intel_gvt *gvt, unsigned int type_group_id)
{
const char *driver_name =
dev_driver_string(gvt->gt->i915->drm.dev);
int i;
name += strlen(driver_name) + 1;
for (i = 0; i < gvt->num_types; i++) {
struct intel_vgpu_type *t = &gvt->types[i];
if (!strncmp(t->name, name, sizeof(t->name)))
return t;
}
return NULL;
if (WARN_ON(type_group_id >= gvt->num_types))
return NULL;
return &gvt->types[type_group_id];
}
static ssize_t available_instances_show(struct kobject *kobj,
struct device *dev, char *buf)
static ssize_t available_instances_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr,
char *buf)
{
struct intel_vgpu_type *type;
unsigned int num = 0;
void *gvt = kdev_to_i915(dev)->gvt;
void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype));
if (!type)
num = 0;
else
@ -80,19 +71,19 @@ static ssize_t available_instances_show(struct kobject *kobj,
return sprintf(buf, "%u\n", num);
}
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t device_api_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}
static ssize_t description_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t description_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
struct intel_vgpu_type *type;
void *gvt = kdev_to_i915(dev)->gvt;
void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype));
if (!type)
return 0;

View file

@ -574,8 +574,8 @@ struct intel_gvt_ops {
void (*vgpu_reset)(struct intel_vgpu *);
void (*vgpu_activate)(struct intel_vgpu *);
void (*vgpu_deactivate)(struct intel_vgpu *);
struct intel_vgpu_type *(*gvt_find_vgpu_type)(struct intel_gvt *gvt,
const char *name);
struct intel_vgpu_type *(*gvt_find_vgpu_type)(
struct intel_gvt *gvt, unsigned int type_group_id);
bool (*get_gvt_attrs)(struct attribute_group ***intel_vgpu_type_groups);
int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *);
int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);

View file

@ -689,7 +689,7 @@ static void kvmgt_put_vfio_device(void *vgpu)
vfio_device_put(vdev->vfio_device);
}
static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
static int intel_vgpu_create(struct mdev_device *mdev)
{
struct intel_vgpu *vgpu = NULL;
struct intel_vgpu_type *type;
@ -700,10 +700,9 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
pdev = mdev_parent_dev(mdev);
gvt = kdev_to_i915(pdev)->gvt;
type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj));
type = intel_gvt_ops->gvt_find_vgpu_type(gvt,
mdev_get_type_group_id(mdev));
if (!type) {
gvt_vgpu_err("failed to find type %s to create\n",
kobject_name(kobj));
ret = -EINVAL;
goto out;
}

View file

@ -71,23 +71,26 @@ static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
return NOTIFY_DONE;
}
static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t name_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
}
static MDEV_TYPE_ATTR_RO(name);
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t device_api_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
}
static MDEV_TYPE_ATTR_RO(device_api);
static ssize_t available_instances_show(struct kobject *kobj,
struct device *dev, char *buf)
static ssize_t available_instances_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr,
char *buf)
{
struct vfio_ccw_private *private = dev_get_drvdata(dev);
struct vfio_ccw_private *private =
dev_get_drvdata(mtype_get_parent_dev(mtype));
return sprintf(buf, "%d\n", atomic_read(&private->avail));
}
@ -110,7 +113,7 @@ static struct attribute_group *mdev_type_groups[] = {
NULL,
};
static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
static int vfio_ccw_mdev_create(struct mdev_device *mdev)
{
struct vfio_ccw_private *private =
dev_get_drvdata(mdev_parent_dev(mdev));

View file

@ -335,7 +335,7 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
matrix->adm_max = info->apxa ? info->Nd : 15;
}
static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
static int vfio_ap_mdev_create(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev;
@ -386,15 +386,17 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
return 0;
}
static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t name_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
}
static MDEV_TYPE_ATTR_RO(name);
static ssize_t available_instances_show(struct kobject *kobj,
struct device *dev, char *buf)
static ssize_t available_instances_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n",
atomic_read(&matrix_dev->available_instances));
@ -402,8 +404,8 @@ static ssize_t available_instances_show(struct kobject *kobj,
static MDEV_TYPE_ATTR_RO(available_instances);
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t device_api_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
}

View file

@ -75,7 +75,8 @@ static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
goto unlock;
}
cont_vdev = vfio_device_data(device);
cont_vdev =
container_of(device, struct vfio_fsl_mc_device, vdev);
if (!cont_vdev || !cont_vdev->reflck) {
vfio_device_put(device);
ret = -ENODEV;
@ -135,9 +136,10 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
kfree(vdev->regions);
}
static int vfio_fsl_mc_open(void *device_data)
static int vfio_fsl_mc_open(struct vfio_device *core_vdev)
{
struct vfio_fsl_mc_device *vdev = device_data;
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
int ret;
if (!try_module_get(THIS_MODULE))
@ -161,9 +163,10 @@ static int vfio_fsl_mc_open(void *device_data)
return ret;
}
static void vfio_fsl_mc_release(void *device_data)
static void vfio_fsl_mc_release(struct vfio_device *core_vdev)
{
struct vfio_fsl_mc_device *vdev = device_data;
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
int ret;
mutex_lock(&vdev->reflck->lock);
@ -197,11 +200,12 @@ static void vfio_fsl_mc_release(void *device_data)
module_put(THIS_MODULE);
}
static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
unsigned long arg)
static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
unsigned long minsz;
struct vfio_fsl_mc_device *vdev = device_data;
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
switch (cmd) {
@ -327,10 +331,11 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
}
}
static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
static ssize_t vfio_fsl_mc_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_fsl_mc_device *vdev = device_data;
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
@ -404,10 +409,12 @@ static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
return 0;
}
static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
size_t count, loff_t *ppos)
static ssize_t vfio_fsl_mc_write(struct vfio_device *core_vdev,
const char __user *buf, size_t count,
loff_t *ppos)
{
struct vfio_fsl_mc_device *vdev = device_data;
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
@ -468,9 +475,11 @@ static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
size, vma->vm_page_prot);
}
static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev,
struct vm_area_struct *vma)
{
struct vfio_fsl_mc_device *vdev = device_data;
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
unsigned int index;
@ -568,25 +577,41 @@ static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev)
dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret);
goto out_nc_unreg;
}
ret = dprc_scan_container(mc_dev, false);
if (ret) {
dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
goto out_dprc_cleanup;
}
return 0;
out_dprc_cleanup:
dprc_remove_devices(mc_dev, NULL, 0);
dprc_cleanup(mc_dev);
out_nc_unreg:
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
vdev->nb.notifier_call = NULL;
return ret;
}
static int vfio_fsl_mc_scan_container(struct fsl_mc_device *mc_dev)
{
int ret;
/* non dprc devices do not scan for other devices */
if (!is_fsl_mc_bus_dprc(mc_dev))
return 0;
ret = dprc_scan_container(mc_dev, false);
if (ret) {
dev_err(&mc_dev->dev,
"VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
dprc_remove_devices(mc_dev, NULL, 0);
return ret;
}
return 0;
}
static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
if (!is_fsl_mc_bus_dprc(mc_dev))
return;
dprc_cleanup(mc_dev);
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
}
static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
{
struct iommu_group *group;
@ -600,36 +625,50 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
return -EINVAL;
}
vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev) {
ret = -ENOMEM;
goto out_group_put;
}
vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops);
vdev->mc_dev = mc_dev;
ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
if (ret) {
dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
goto out_group_put;
}
mutex_init(&vdev->igate);
ret = vfio_fsl_mc_reflck_attach(vdev);
if (ret)
goto out_group_dev;
goto out_kfree;
ret = vfio_fsl_mc_init_device(vdev);
if (ret)
goto out_reflck;
mutex_init(&vdev->igate);
ret = vfio_register_group_dev(&vdev->vdev);
if (ret) {
dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
goto out_device;
}
/*
* This triggers recursion into vfio_fsl_mc_probe() on another device
* and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the
* vfio_add_group_dev() above. It has no impact on this vdev, so it is
* safe to be after the vfio device is made live.
*/
ret = vfio_fsl_mc_scan_container(mc_dev);
if (ret)
goto out_group_dev;
dev_set_drvdata(dev, vdev);
return 0;
out_group_dev:
vfio_unregister_group_dev(&vdev->vdev);
out_device:
vfio_fsl_uninit_device(vdev);
out_reflck:
vfio_fsl_mc_reflck_put(vdev->reflck);
out_group_dev:
vfio_del_group_dev(dev);
out_kfree:
kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, dev);
return ret;
@ -637,25 +676,17 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
{
struct vfio_fsl_mc_device *vdev;
struct device *dev = &mc_dev->dev;
struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev);
vdev = vfio_del_group_dev(dev);
if (!vdev)
return -EINVAL;
vfio_unregister_group_dev(&vdev->vdev);
mutex_destroy(&vdev->igate);
dprc_remove_devices(mc_dev, NULL, 0);
vfio_fsl_uninit_device(vdev);
vfio_fsl_mc_reflck_put(vdev->reflck);
if (is_fsl_mc_bus_dprc(mc_dev)) {
dprc_remove_devices(mc_dev, NULL, 0);
dprc_cleanup(mc_dev);
}
if (vdev->nb.notifier_call)
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
kfree(vdev);
vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
return 0;

View file

@ -36,6 +36,7 @@ struct vfio_fsl_mc_region {
};
struct vfio_fsl_mc_device {
struct vfio_device vdev;
struct fsl_mc_device *mc_dev;
struct notifier_block nb;
int refcnt;

View file

@ -29,39 +29,39 @@ static DEFINE_MUTEX(mdev_list_lock);
struct device *mdev_parent_dev(struct mdev_device *mdev)
{
return mdev->parent->dev;
return mdev->type->parent->dev;
}
EXPORT_SYMBOL(mdev_parent_dev);
void *mdev_get_drvdata(struct mdev_device *mdev)
/*
* Return the index in supported_type_groups that this mdev_device was created
* from.
*/
unsigned int mdev_get_type_group_id(struct mdev_device *mdev)
{
return mdev->driver_data;
return mdev->type->type_group_id;
}
EXPORT_SYMBOL(mdev_get_drvdata);
EXPORT_SYMBOL(mdev_get_type_group_id);
void mdev_set_drvdata(struct mdev_device *mdev, void *data)
/*
* Used in mdev_type_attribute sysfs functions to return the index in the
* supported_type_groups that the sysfs is called from.
*/
unsigned int mtype_get_type_group_id(struct mdev_type *mtype)
{
mdev->driver_data = data;
return mtype->type_group_id;
}
EXPORT_SYMBOL(mdev_set_drvdata);
EXPORT_SYMBOL(mtype_get_type_group_id);
struct device *mdev_dev(struct mdev_device *mdev)
/*
* Used in mdev_type_attribute sysfs functions to return the parent struct
* device
*/
struct device *mtype_get_parent_dev(struct mdev_type *mtype)
{
return &mdev->dev;
return mtype->parent->dev;
}
EXPORT_SYMBOL(mdev_dev);
struct mdev_device *mdev_from_dev(struct device *dev)
{
return dev_is_mdev(dev) ? to_mdev_device(dev) : NULL;
}
EXPORT_SYMBOL(mdev_from_dev);
const guid_t *mdev_uuid(struct mdev_device *mdev)
{
return &mdev->uuid;
}
EXPORT_SYMBOL(mdev_uuid);
EXPORT_SYMBOL(mtype_get_parent_dev);
/* Should be called holding parent_list_lock */
static struct mdev_parent *__find_parent_device(struct device *dev)
@ -75,7 +75,7 @@ static struct mdev_parent *__find_parent_device(struct device *dev)
return NULL;
}
static void mdev_release_parent(struct kref *kref)
void mdev_release_parent(struct kref *kref)
{
struct mdev_parent *parent = container_of(kref, struct mdev_parent,
ref);
@ -85,31 +85,14 @@ static void mdev_release_parent(struct kref *kref)
put_device(dev);
}
static struct mdev_parent *mdev_get_parent(struct mdev_parent *parent)
{
if (parent)
kref_get(&parent->ref);
return parent;
}
static void mdev_put_parent(struct mdev_parent *parent)
{
if (parent)
kref_put(&parent->ref, mdev_release_parent);
}
/* Caller must hold parent unreg_sem read or write lock */
static void mdev_device_remove_common(struct mdev_device *mdev)
{
struct mdev_parent *parent;
struct mdev_type *type;
struct mdev_parent *parent = mdev->type->parent;
int ret;
type = to_mdev_type(mdev->type_kobj);
mdev_remove_sysfs_files(&mdev->dev, type);
mdev_remove_sysfs_files(mdev);
device_del(&mdev->dev);
parent = mdev->parent;
lockdep_assert_held(&parent->unreg_sem);
ret = parent->ops->remove(mdev);
if (ret)
@ -117,17 +100,14 @@ static void mdev_device_remove_common(struct mdev_device *mdev)
/* Balances with device_initialize() */
put_device(&mdev->dev);
mdev_put_parent(parent);
}
static int mdev_device_remove_cb(struct device *dev, void *data)
{
if (dev_is_mdev(dev)) {
struct mdev_device *mdev;
struct mdev_device *mdev = mdev_from_dev(dev);
mdev = to_mdev_device(dev);
if (mdev)
mdev_device_remove_common(mdev);
}
return 0;
}
@ -256,8 +236,13 @@ void mdev_unregister_device(struct device *dev)
}
EXPORT_SYMBOL(mdev_unregister_device);
static void mdev_device_free(struct mdev_device *mdev)
static void mdev_device_release(struct device *dev)
{
struct mdev_device *mdev = to_mdev_device(dev);
/* Pairs with the get in mdev_device_create() */
kobject_put(&mdev->type->kobj);
mutex_lock(&mdev_list_lock);
list_del(&mdev->next);
mutex_unlock(&mdev_list_lock);
@ -266,24 +251,11 @@ static void mdev_device_free(struct mdev_device *mdev)
kfree(mdev);
}
static void mdev_device_release(struct device *dev)
{
struct mdev_device *mdev = to_mdev_device(dev);
mdev_device_free(mdev);
}
int mdev_device_create(struct kobject *kobj,
struct device *dev, const guid_t *uuid)
int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
{
int ret;
struct mdev_device *mdev, *tmp;
struct mdev_parent *parent;
struct mdev_type *type = to_mdev_type(kobj);
parent = mdev_get_parent(type->parent);
if (!parent)
return -EINVAL;
struct mdev_parent *parent = type->parent;
mutex_lock(&mdev_list_lock);
@ -291,50 +263,50 @@ int mdev_device_create(struct kobject *kobj,
list_for_each_entry(tmp, &mdev_list, next) {
if (guid_equal(&tmp->uuid, uuid)) {
mutex_unlock(&mdev_list_lock);
ret = -EEXIST;
goto mdev_fail;
return -EEXIST;
}
}
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev) {
mutex_unlock(&mdev_list_lock);
ret = -ENOMEM;
goto mdev_fail;
return -ENOMEM;
}
device_initialize(&mdev->dev);
mdev->dev.parent = parent->dev;
mdev->dev.bus = &mdev_bus_type;
mdev->dev.release = mdev_device_release;
mdev->dev.groups = parent->ops->mdev_attr_groups;
mdev->type = type;
/* Pairs with the put in mdev_device_release() */
kobject_get(&type->kobj);
guid_copy(&mdev->uuid, uuid);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
mdev->parent = parent;
ret = dev_set_name(&mdev->dev, "%pUl", uuid);
if (ret)
goto out_put_device;
/* Check if parent unregistration has started */
if (!down_read_trylock(&parent->unreg_sem)) {
mdev_device_free(mdev);
ret = -ENODEV;
goto mdev_fail;
goto out_put_device;
}
device_initialize(&mdev->dev);
mdev->dev.parent = dev;
mdev->dev.bus = &mdev_bus_type;
mdev->dev.release = mdev_device_release;
dev_set_name(&mdev->dev, "%pUl", uuid);
mdev->dev.groups = parent->ops->mdev_attr_groups;
mdev->type_kobj = kobj;
ret = parent->ops->create(kobj, mdev);
ret = parent->ops->create(mdev);
if (ret)
goto ops_create_fail;
goto out_unlock;
ret = device_add(&mdev->dev);
if (ret)
goto add_fail;
goto out_remove;
ret = mdev_create_sysfs_files(&mdev->dev, type);
ret = mdev_create_sysfs_files(mdev);
if (ret)
goto sysfs_fail;
goto out_del;
mdev->active = true;
dev_dbg(&mdev->dev, "MDEV: created\n");
@ -342,24 +314,21 @@ int mdev_device_create(struct kobject *kobj,
return 0;
sysfs_fail:
out_del:
device_del(&mdev->dev);
add_fail:
out_remove:
parent->ops->remove(mdev);
ops_create_fail:
out_unlock:
up_read(&parent->unreg_sem);
out_put_device:
put_device(&mdev->dev);
mdev_fail:
mdev_put_parent(parent);
return ret;
}
int mdev_device_remove(struct device *dev)
int mdev_device_remove(struct mdev_device *mdev)
{
struct mdev_device *mdev, *tmp;
struct mdev_parent *parent;
mdev = to_mdev_device(dev);
struct mdev_device *tmp;
struct mdev_parent *parent = mdev->type->parent;
mutex_lock(&mdev_list_lock);
list_for_each_entry(tmp, &mdev_list, next) {
@ -380,7 +349,6 @@ int mdev_device_remove(struct device *dev)
mdev->active = false;
mutex_unlock(&mdev_list_lock);
parent = mdev->parent;
/* Check if parent unregistration has started */
if (!down_read_trylock(&parent->unreg_sem))
return -ENODEV;
@ -390,24 +358,6 @@ int mdev_device_remove(struct device *dev)
return 0;
}
int mdev_set_iommu_device(struct device *dev, struct device *iommu_device)
{
struct mdev_device *mdev = to_mdev_device(dev);
mdev->iommu_device = iommu_device;
return 0;
}
EXPORT_SYMBOL(mdev_set_iommu_device);
struct device *mdev_get_iommu_device(struct device *dev)
{
struct mdev_device *mdev = to_mdev_device(dev);
return mdev->iommu_device;
}
EXPORT_SYMBOL(mdev_get_iommu_device);
static int __init mdev_init(void)
{
return mdev_bus_register();

View file

@ -39,7 +39,8 @@ static void mdev_detach_iommu(struct mdev_device *mdev)
static int mdev_probe(struct device *dev)
{
struct mdev_driver *drv = to_mdev_driver(dev->driver);
struct mdev_driver *drv =
container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
int ret;
@ -47,8 +48,8 @@ static int mdev_probe(struct device *dev)
if (ret)
return ret;
if (drv && drv->probe) {
ret = drv->probe(dev);
if (drv->probe) {
ret = drv->probe(mdev);
if (ret)
mdev_detach_iommu(mdev);
}
@ -58,11 +59,12 @@ static int mdev_probe(struct device *dev)
static int mdev_remove(struct device *dev)
{
struct mdev_driver *drv = to_mdev_driver(dev->driver);
struct mdev_driver *drv =
container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
if (drv && drv->remove)
drv->remove(dev);
if (drv->remove)
drv->remove(mdev);
mdev_detach_iommu(mdev);
@ -79,16 +81,13 @@ EXPORT_SYMBOL_GPL(mdev_bus_type);
/**
* mdev_register_driver - register a new MDEV driver
* @drv: the driver to register
* @owner: module owner of driver to be registered
*
* Returns a negative value on error, otherwise 0.
**/
int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
int mdev_register_driver(struct mdev_driver *drv)
{
/* initialize common driver fields */
drv->driver.name = drv->name;
drv->driver.bus = &mdev_bus_type;
drv->driver.owner = owner;
/* register with core */
return driver_register(&drv->driver);

View file

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Mediated device interal definitions
* Mediated device internal definitions
*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Author: Neo Jia <cjia@nvidia.com>
@ -24,26 +24,12 @@ struct mdev_parent {
struct rw_semaphore unreg_sem;
};
struct mdev_device {
struct device dev;
struct mdev_parent *parent;
guid_t uuid;
void *driver_data;
struct list_head next;
struct kobject *type_kobj;
struct device *iommu_device;
bool active;
};
#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)
#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
struct mdev_type {
struct kobject kobj;
struct kobject *devices_kobj;
struct mdev_parent *parent;
struct list_head next;
struct attribute_group *group;
unsigned int type_group_id;
};
#define to_mdev_type_attr(_attr) \
@ -54,11 +40,22 @@ struct mdev_type {
int parent_create_sysfs_files(struct mdev_parent *parent);
void parent_remove_sysfs_files(struct mdev_parent *parent);
int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
int mdev_create_sysfs_files(struct mdev_device *mdev);
void mdev_remove_sysfs_files(struct mdev_device *mdev);
int mdev_device_create(struct kobject *kobj,
struct device *dev, const guid_t *uuid);
int mdev_device_remove(struct device *dev);
int mdev_device_create(struct mdev_type *kobj, const guid_t *uuid);
int mdev_device_remove(struct mdev_device *dev);
void mdev_release_parent(struct kref *kref);
static inline void mdev_get_parent(struct mdev_parent *parent)
{
kref_get(&parent->ref);
}
static inline void mdev_put_parent(struct mdev_parent *parent)
{
kref_put(&parent->ref, mdev_release_parent);
}
#endif /* MDEV_PRIVATE_H */

View file

@ -26,7 +26,7 @@ static ssize_t mdev_type_attr_show(struct kobject *kobj,
ssize_t ret = -EIO;
if (attr->show)
ret = attr->show(kobj, type->parent->dev, buf);
ret = attr->show(type, attr, buf);
return ret;
}
@ -39,7 +39,7 @@ static ssize_t mdev_type_attr_store(struct kobject *kobj,
ssize_t ret = -EIO;
if (attr->store)
ret = attr->store(&type->kobj, type->parent->dev, buf, count);
ret = attr->store(type, attr, buf, count);
return ret;
}
@ -48,8 +48,9 @@ static const struct sysfs_ops mdev_type_sysfs_ops = {
.store = mdev_type_attr_store,
};
static ssize_t create_store(struct kobject *kobj, struct device *dev,
const char *buf, size_t count)
static ssize_t create_store(struct mdev_type *mtype,
struct mdev_type_attribute *attr, const char *buf,
size_t count)
{
char *str;
guid_t uuid;
@ -67,7 +68,7 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev,
if (ret)
return ret;
ret = mdev_device_create(kobj, dev, &uuid);
ret = mdev_device_create(mtype, &uuid);
if (ret)
return ret;
@ -81,6 +82,8 @@ static void mdev_type_release(struct kobject *kobj)
struct mdev_type *type = to_mdev_type(kobj);
pr_debug("Releasing group %s\n", kobj->name);
/* Pairs with the get in add_mdev_supported_type() */
mdev_put_parent(type->parent);
kfree(type);
}
@ -90,9 +93,11 @@ static struct kobj_type mdev_type_ktype = {
};
static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
struct attribute_group *group)
unsigned int type_group_id)
{
struct mdev_type *type;
struct attribute_group *group =
parent->ops->supported_type_groups[type_group_id];
int ret;
if (!group->name) {
@ -105,6 +110,10 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
return ERR_PTR(-ENOMEM);
type->kobj.kset = parent->mdev_types_kset;
type->parent = parent;
/* Pairs with the put in mdev_type_release() */
mdev_get_parent(parent);
type->type_group_id = type_group_id;
ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
"%s-%s", dev_driver_string(parent->dev),
@ -130,9 +139,6 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
ret = -ENOMEM;
goto attrs_failed;
}
type->group = group;
type->parent = parent;
return type;
attrs_failed:
@ -147,8 +153,11 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
static void remove_mdev_supported_type(struct mdev_type *type)
{
struct attribute_group *group =
type->parent->ops->supported_type_groups[type->type_group_id];
sysfs_remove_files(&type->kobj,
(const struct attribute **)type->group->attrs);
(const struct attribute **)group->attrs);
kobject_put(type->devices_kobj);
sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
kobject_del(&type->kobj);
@ -162,8 +171,7 @@ static int add_mdev_supported_type_groups(struct mdev_parent *parent)
for (i = 0; parent->ops->supported_type_groups[i]; i++) {
struct mdev_type *type;
type = add_mdev_supported_type(parent,
parent->ops->supported_type_groups[i]);
type = add_mdev_supported_type(parent, i);
if (IS_ERR(type)) {
struct mdev_type *ltype, *tmp;
@ -225,6 +233,7 @@ int parent_create_sysfs_files(struct mdev_parent *parent)
static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct mdev_device *mdev = to_mdev_device(dev);
unsigned long val;
if (kstrtoul(buf, 0, &val) < 0)
@ -233,7 +242,7 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
if (val && device_remove_file_self(dev, attr)) {
int ret;
ret = mdev_device_remove(dev);
ret = mdev_device_remove(mdev);
if (ret)
return ret;
}
@ -248,34 +257,38 @@ static const struct attribute *mdev_device_attrs[] = {
NULL,
};
int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
int mdev_create_sysfs_files(struct mdev_device *mdev)
{
struct mdev_type *type = mdev->type;
struct kobject *kobj = &mdev->dev.kobj;
int ret;
ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
ret = sysfs_create_link(type->devices_kobj, kobj, dev_name(&mdev->dev));
if (ret)
return ret;
ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
ret = sysfs_create_link(kobj, &type->kobj, "mdev_type");
if (ret)
goto type_link_failed;
ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
ret = sysfs_create_files(kobj, mdev_device_attrs);
if (ret)
goto create_files_failed;
return ret;
create_files_failed:
sysfs_remove_link(&dev->kobj, "mdev_type");
sysfs_remove_link(kobj, "mdev_type");
type_link_failed:
sysfs_remove_link(type->devices_kobj, dev_name(dev));
sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev));
return ret;
}
void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type)
void mdev_remove_sysfs_files(struct mdev_device *mdev)
{
sysfs_remove_files(&dev->kobj, mdev_device_attrs);
sysfs_remove_link(&dev->kobj, "mdev_type");
sysfs_remove_link(type->devices_kobj, dev_name(dev));
struct kobject *kobj = &mdev->dev.kobj;
sysfs_remove_files(kobj, mdev_device_attrs);
sysfs_remove_link(kobj, "mdev_type");
sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev));
}

View file

@ -21,10 +21,11 @@
#define DRIVER_AUTHOR "NVIDIA Corporation"
#define DRIVER_DESC "VFIO based driver for Mediated device"
static int vfio_mdev_open(void *device_data)
static int vfio_mdev_open(struct vfio_device *core_vdev)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
int ret;
if (unlikely(!parent->ops->open))
@ -40,10 +41,10 @@ static int vfio_mdev_open(void *device_data)
return ret;
}
static void vfio_mdev_release(void *device_data)
static void vfio_mdev_release(struct vfio_device *core_vdev)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
if (likely(parent->ops->release))
parent->ops->release(mdev);
@ -51,11 +52,11 @@ static void vfio_mdev_release(void *device_data)
module_put(THIS_MODULE);
}
static long vfio_mdev_unlocked_ioctl(void *device_data,
static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->ioctl))
return -EINVAL;
@ -63,11 +64,11 @@ static long vfio_mdev_unlocked_ioctl(void *device_data,
return parent->ops->ioctl(mdev, cmd, arg);
}
static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
static ssize_t vfio_mdev_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->read))
return -EINVAL;
@ -75,11 +76,12 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
return parent->ops->read(mdev, buf, count, ppos);
}
static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
size_t count, loff_t *ppos)
static ssize_t vfio_mdev_write(struct vfio_device *core_vdev,
const char __user *buf, size_t count,
loff_t *ppos)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->write))
return -EINVAL;
@ -87,10 +89,11 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
return parent->ops->write(mdev, buf, count, ppos);
}
static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
static int vfio_mdev_mmap(struct vfio_device *core_vdev,
struct vm_area_struct *vma)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->mmap))
return -EINVAL;
@ -98,10 +101,10 @@ static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
return parent->ops->mmap(mdev, vma);
}
static void vfio_mdev_request(void *device_data, unsigned int count)
static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count)
{
struct mdev_device *mdev = device_data;
struct mdev_parent *parent = mdev->parent;
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->type->parent;
if (parent->ops->request)
parent->ops->request(mdev, count);
@ -121,27 +124,46 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = {
.request = vfio_mdev_request,
};
static int vfio_mdev_probe(struct device *dev)
static int vfio_mdev_probe(struct mdev_device *mdev)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct vfio_device *vdev;
int ret;
return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev);
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev)
return -ENOMEM;
vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
ret = vfio_register_group_dev(vdev);
if (ret) {
kfree(vdev);
return ret;
}
dev_set_drvdata(&mdev->dev, vdev);
return 0;
}
static void vfio_mdev_remove(struct device *dev)
static void vfio_mdev_remove(struct mdev_device *mdev)
{
vfio_del_group_dev(dev);
struct vfio_device *vdev = dev_get_drvdata(&mdev->dev);
vfio_unregister_group_dev(vdev);
kfree(vdev);
}
static struct mdev_driver vfio_mdev_driver = {
.name = "vfio_mdev",
.driver = {
.name = "vfio_mdev",
.owner = THIS_MODULE,
.mod_name = KBUILD_MODNAME,
},
.probe = vfio_mdev_probe,
.remove = vfio_mdev_remove,
};
static int __init vfio_mdev_init(void)
{
return mdev_register_driver(&vfio_mdev_driver, THIS_MODULE);
return mdev_register_driver(&vfio_mdev_driver);
}
static void __exit vfio_mdev_exit(void)

View file

@ -39,9 +39,3 @@ config VFIO_PCI_IGD
and LPC bridge config space.
To enable Intel IGD assignment through vfio-pci, say Y.
config VFIO_PCI_NVLINK2
def_bool y
depends on VFIO_PCI && PPC_POWERNV && SPAPR_TCE_IOMMU
help
VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs

View file

@ -2,7 +2,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o

View file

@ -378,7 +378,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
if (vfio_pci_is_vga(pdev) &&
pdev->vendor == PCI_VENDOR_ID_INTEL &&
IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
@ -389,24 +388,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
if (ret && ret != -ENODEV) {
pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n");
goto disable_exit;
}
}
if (pdev->vendor == PCI_VENDOR_ID_IBM &&
IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
ret = vfio_pci_ibm_npu2_init(vdev);
if (ret && ret != -ENODEV) {
pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n");
goto disable_exit;
}
}
vfio_pci_probe_mmaps(vdev);
return 0;
@ -517,30 +498,29 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
static struct pci_driver vfio_pci_driver;
static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
struct vfio_device **pf_dev)
static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev)
{
struct pci_dev *physfn = pci_physfn(vdev->pdev);
struct vfio_device *pf_dev;
if (!vdev->pdev->is_virtfn)
return NULL;
*pf_dev = vfio_device_get_from_dev(&physfn->dev);
if (!*pf_dev)
pf_dev = vfio_device_get_from_dev(&physfn->dev);
if (!pf_dev)
return NULL;
if (pci_dev_driver(physfn) != &vfio_pci_driver) {
vfio_device_put(*pf_dev);
vfio_device_put(pf_dev);
return NULL;
}
return vfio_device_data(*pf_dev);
return container_of(pf_dev, struct vfio_pci_device, vdev);
}
static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
{
struct vfio_device *pf_dev;
struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
if (!pf_vdev)
return;
@ -550,12 +530,13 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
WARN_ON(pf_vdev->vf_token->users < 0);
mutex_unlock(&pf_vdev->vf_token->lock);
vfio_device_put(pf_dev);
vfio_device_put(&pf_vdev->vdev);
}
static void vfio_pci_release(void *device_data)
static void vfio_pci_release(struct vfio_device *core_vdev)
{
struct vfio_pci_device *vdev = device_data;
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
mutex_lock(&vdev->reflck->lock);
@ -581,9 +562,10 @@ static void vfio_pci_release(void *device_data)
module_put(THIS_MODULE);
}
static int vfio_pci_open(void *device_data)
static int vfio_pci_open(struct vfio_device *core_vdev)
{
struct vfio_pci_device *vdev = device_data;
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
int ret = 0;
if (!try_module_get(THIS_MODULE))
@ -792,15 +774,16 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
}
struct vfio_devices {
struct vfio_device **devices;
struct vfio_pci_device **devices;
int cur_index;
int max_index;
};
static long vfio_pci_ioctl(void *device_data,
static long vfio_pci_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
struct vfio_pci_device *vdev = device_data;
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
unsigned long minsz;
if (cmd == VFIO_DEVICE_GET_INFO) {
@ -1280,9 +1263,7 @@ static long vfio_pci_ioctl(void *device_data,
goto hot_reset_release;
for (; mem_idx < devs.cur_index; mem_idx++) {
struct vfio_pci_device *tmp;
tmp = vfio_device_data(devs.devices[mem_idx]);
struct vfio_pci_device *tmp = devs.devices[mem_idx];
ret = down_write_trylock(&tmp->memory_lock);
if (!ret) {
@ -1297,17 +1278,13 @@ static long vfio_pci_ioctl(void *device_data,
hot_reset_release:
for (i = 0; i < devs.cur_index; i++) {
struct vfio_device *device;
struct vfio_pci_device *tmp;
device = devs.devices[i];
tmp = vfio_device_data(device);
struct vfio_pci_device *tmp = devs.devices[i];
if (i < mem_idx)
up_write(&tmp->memory_lock);
else
mutex_unlock(&tmp->vma_lock);
vfio_device_put(device);
vfio_device_put(&tmp->vdev);
}
kfree(devs.devices);
@ -1402,11 +1379,10 @@ static long vfio_pci_ioctl(void *device_data,
return -ENOTTY;
}
static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
struct vfio_pci_device *vdev = device_data;
if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
return -EINVAL;
@ -1434,22 +1410,28 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
return -EINVAL;
}
static ssize_t vfio_pci_read(void *device_data, char __user *buf,
static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
if (!count)
return 0;
return vfio_pci_rw(device_data, buf, count, ppos, false);
return vfio_pci_rw(vdev, buf, count, ppos, false);
}
static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
if (!count)
return 0;
return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
}
/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
@ -1646,9 +1628,10 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = {
.fault = vfio_pci_mmap_fault,
};
static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
struct vfio_pci_device *vdev = device_data;
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
struct pci_dev *pdev = vdev->pdev;
unsigned int index;
u64 phys_len, req_len, pgoff, req_start;
@ -1716,9 +1699,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
return 0;
}
static void vfio_pci_request(void *device_data, unsigned int count)
static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count)
{
struct vfio_pci_device *vdev = device_data;
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
struct pci_dev *pdev = vdev->pdev;
mutex_lock(&vdev->igate);
@ -1769,8 +1753,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
return 0; /* No VF token provided or required */
if (vdev->pdev->is_virtfn) {
struct vfio_device *pf_dev;
struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
bool match;
if (!pf_vdev) {
@ -1783,7 +1766,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
}
if (!vf_token) {
vfio_device_put(pf_dev);
vfio_device_put(&pf_vdev->vdev);
pci_info_ratelimited(vdev->pdev,
"VF token required to access device\n");
return -EACCES;
@ -1793,7 +1776,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
mutex_unlock(&pf_vdev->vf_token->lock);
vfio_device_put(pf_dev);
vfio_device_put(&pf_vdev->vdev);
if (!match) {
pci_info_ratelimited(vdev->pdev,
@ -1832,9 +1815,10 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
#define VF_TOKEN_ARG "vf_token="
static int vfio_pci_match(void *device_data, char *buf)
static int vfio_pci_match(struct vfio_device *core_vdev, char *buf)
{
struct vfio_pci_device *vdev = device_data;
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
bool vf_token = false;
uuid_t uuid;
int ret;
@ -1924,6 +1908,68 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
return 0;
}
static int vfio_pci_vf_init(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
int ret;
if (!pdev->is_physfn)
return 0;
vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
if (!vdev->vf_token)
return -ENOMEM;
mutex_init(&vdev->vf_token->lock);
uuid_gen(&vdev->vf_token->uuid);
vdev->nb.notifier_call = vfio_pci_bus_notifier;
ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
if (ret) {
kfree(vdev->vf_token);
return ret;
}
return 0;
}
static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)
{
if (!vdev->vf_token)
return;
bus_unregister_notifier(&pci_bus_type, &vdev->nb);
WARN_ON(vdev->vf_token->users);
mutex_destroy(&vdev->vf_token->lock);
kfree(vdev->vf_token);
}
static int vfio_pci_vga_init(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
int ret;
if (!vfio_pci_is_vga(pdev))
return 0;
ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
if (ret)
return ret;
vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));
return 0;
}
static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
if (!vfio_pci_is_vga(pdev))
return;
vga_client_register(pdev, NULL, NULL, NULL);
vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
VGA_RSRC_LEGACY_IO |
VGA_RSRC_LEGACY_MEM);
}
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_device *vdev;
@ -1959,6 +2005,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_group_put;
}
vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops);
vdev->pdev = pdev;
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
@ -1970,35 +2017,15 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&vdev->vma_list);
init_rwsem(&vdev->memory_lock);
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret)
goto out_free;
ret = vfio_pci_reflck_attach(vdev);
if (ret)
goto out_del_group_dev;
if (pdev->is_physfn) {
vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
if (!vdev->vf_token) {
ret = -ENOMEM;
goto out_reflck;
}
mutex_init(&vdev->vf_token->lock);
uuid_gen(&vdev->vf_token->uuid);
vdev->nb.notifier_call = vfio_pci_bus_notifier;
ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
if (ret)
goto out_vf_token;
}
if (vfio_pci_is_vga(pdev)) {
vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
vga_set_legacy_decoding(pdev,
vfio_pci_set_vga_decode(vdev, false));
}
goto out_free;
ret = vfio_pci_vf_init(vdev);
if (ret)
goto out_reflck;
ret = vfio_pci_vga_init(vdev);
if (ret)
goto out_vf;
vfio_pci_probe_power_state(vdev);
@ -2016,15 +2043,21 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vfio_pci_set_power_state(vdev, PCI_D3hot);
}
return ret;
ret = vfio_register_group_dev(&vdev->vdev);
if (ret)
goto out_power;
dev_set_drvdata(&pdev->dev, vdev);
return 0;
out_vf_token:
kfree(vdev->vf_token);
out_power:
if (!disable_idle_d3)
vfio_pci_set_power_state(vdev, PCI_D0);
out_vf:
vfio_pci_vf_uninit(vdev);
out_reflck:
vfio_pci_reflck_put(vdev->reflck);
out_del_group_dev:
vfio_del_group_dev(&pdev->dev);
out_free:
kfree(vdev->pm_save);
kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, &pdev->dev);
@ -2033,41 +2066,25 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void vfio_pci_remove(struct pci_dev *pdev)
{
struct vfio_pci_device *vdev;
struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);
pci_disable_sriov(pdev);
vdev = vfio_del_group_dev(&pdev->dev);
if (!vdev)
return;
if (vdev->vf_token) {
WARN_ON(vdev->vf_token->users);
mutex_destroy(&vdev->vf_token->lock);
kfree(vdev->vf_token);
}
if (vdev->nb.notifier_call)
bus_unregister_notifier(&pci_bus_type, &vdev->nb);
vfio_unregister_group_dev(&vdev->vdev);
vfio_pci_vf_uninit(vdev);
vfio_pci_reflck_put(vdev->reflck);
vfio_pci_vga_uninit(vdev);
vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
kfree(vdev->region);
mutex_destroy(&vdev->ioeventfds_lock);
if (!disable_idle_d3)
vfio_pci_set_power_state(vdev, PCI_D0);
mutex_destroy(&vdev->ioeventfds_lock);
kfree(vdev->region);
kfree(vdev->pm_save);
kfree(vdev);
if (vfio_pci_is_vga(pdev)) {
vga_client_register(pdev, NULL, NULL, NULL);
vga_set_legacy_decoding(pdev,
VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
}
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@ -2080,11 +2097,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
if (device == NULL)
return PCI_ERS_RESULT_DISCONNECT;
vdev = vfio_device_data(device);
if (vdev == NULL) {
vfio_device_put(device);
return PCI_ERS_RESULT_DISCONNECT;
}
vdev = container_of(device, struct vfio_pci_device, vdev);
mutex_lock(&vdev->igate);
@ -2100,7 +2113,6 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
struct vfio_pci_device *vdev;
struct vfio_device *device;
int ret = 0;
@ -2113,12 +2125,6 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
if (!device)
return -ENODEV;
vdev = vfio_device_data(device);
if (!vdev) {
vfio_device_put(device);
return -ENODEV;
}
if (nr_virtfn == 0)
pci_disable_sriov(pdev);
else
@ -2178,7 +2184,7 @@ static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data)
return 0;
}
vdev = vfio_device_data(device);
vdev = container_of(device, struct vfio_pci_device, vdev);
if (vdev->reflck) {
vfio_pci_reflck_get(vdev->reflck);
@ -2240,7 +2246,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return -EBUSY;
}
vdev = vfio_device_data(device);
vdev = container_of(device, struct vfio_pci_device, vdev);
/* Fault if the device is not unused */
if (vdev->refcnt) {
@ -2248,7 +2254,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return -EBUSY;
}
devs->devices[devs->cur_index++] = device;
devs->devices[devs->cur_index++] = vdev;
return 0;
}
@ -2270,7 +2276,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
return -EBUSY;
}
vdev = vfio_device_data(device);
vdev = container_of(device, struct vfio_pci_device, vdev);
/*
* Locking multiple devices is prone to deadlock, runaway and
@ -2281,7 +2287,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
return -EBUSY;
}
devs->devices[devs->cur_index++] = device;
devs->devices[devs->cur_index++] = vdev;
return 0;
}
@ -2329,7 +2335,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
/* Does at least one need a reset? */
for (i = 0; i < devs.cur_index; i++) {
tmp = vfio_device_data(devs.devices[i]);
tmp = devs.devices[i];
if (tmp->needs_reset) {
ret = pci_reset_bus(vdev->pdev);
break;
@ -2338,7 +2344,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
put_devs:
for (i = 0; i < devs.cur_index; i++) {
tmp = vfio_device_data(devs.devices[i]);
tmp = devs.devices[i];
/*
* If reset was successful, affected devices no longer need
@ -2354,7 +2360,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
vfio_pci_set_power_state(tmp, PCI_D3hot);
}
vfio_device_put(devs.devices[i]);
vfio_device_put(&tmp->vdev);
}
kfree(devs.devices);
@ -2411,7 +2417,7 @@ static int __init vfio_pci_init(void)
{
int ret;
/* Allocate shared config space permision data used by all devices */
/* Allocate shared config space permission data used by all devices */
ret = vfio_pci_init_perm_bits();
if (ret)
return ret;

View file

@ -101,7 +101,7 @@ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = {
/*
* Read/Write Permission Bits - one bit for each bit in capability
* Any field can be read if it exists, but what is read depends on
* whether the field is 'virtualized', or just pass thru to the
* whether the field is 'virtualized', or just pass through to the
* hardware. Any virtualized field is also virtualized for writes.
* Writes are only permitted if they have a 1 bit here.
*/

View file

@ -21,6 +21,10 @@
#define OPREGION_SIZE (8 * 1024)
#define OPREGION_PCI_ADDR 0xfc
#define OPREGION_RVDA 0x3ba
#define OPREGION_RVDS 0x3c2
#define OPREGION_VERSION 0x16
static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
@ -58,6 +62,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
u32 addr, size;
void *base;
int ret;
u16 version;
ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
if (ret)
@ -83,6 +88,54 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
size *= 1024; /* In KB */
/*
* Support opregion v2.1+
* When VBT data exceeds 6KB size and cannot be within mailbox #4, then
* the Extended VBT region next to opregion is used to hold the VBT data.
* RVDA (Relative Address of VBT Data from Opregion Base) and RVDS
* (Raw VBT Data Size) from opregion structure member are used to hold the
* address from region base and size of VBT data. RVDA/RVDS are not
* defined before opregion 2.0.
*
* opregion 2.1+: RVDA is unsigned, relative offset from
* opregion base, and should point to the end of opregion.
* otherwise, exposing to userspace to allow read access to everything between
* the OpRegion and VBT is not safe.
* RVDS is defined as size in bytes.
*
* opregion 2.0: rvda is the physical VBT address.
* Since rvda is HPA it cannot be directly used in guest.
* And it should not be practically available for end user,so it is not supported.
*/
version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION));
if (version >= 0x0200) {
u64 rvda;
u32 rvds;
rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA));
rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));
if (rvda && rvds) {
/* no support for opregion v2.0 with physical VBT address */
if (version == 0x0200) {
memunmap(base);
pci_err(vdev->pdev,
"IGD assignment does not support opregion v2.0 with an extended VBT region\n");
return -EINVAL;
}
if (rvda != size) {
memunmap(base);
pci_err(vdev->pdev,
"Extended VBT does not follow opregion on version 0x%04x\n",
version);
return -EINVAL;
}
/* region size for opregion v2.0+: opregion and VBT size. */
size += rvds;
}
}
if (size != OPREGION_SIZE) {
memunmap(base);
base = memremap(addr, size, MEMREMAP_WB);

View file

@ -1,490 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2.
*
* Copyright (C) 2018 IBM Corp. All rights reserved.
* Author: Alexey Kardashevskiy <aik@ozlabs.ru>
*
* Register an on-GPU RAM region for cacheable access.
*
* Derived from original vfio_pci_igd.c:
* Copyright (C) 2016 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*/
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/sched/mm.h>
#include <linux/mmu_context.h>
#include <asm/kvm_ppc.h>
#include "vfio_pci_private.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap);
EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_npu2_mmap);
struct vfio_pci_nvgpu_data {
unsigned long gpu_hpa; /* GPU RAM physical address */
unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
unsigned long useraddr; /* GPU RAM userspace address */
unsigned long size; /* Size of the GPU RAM window (usually 128GB) */
struct mm_struct *mm;
struct mm_iommu_table_group_mem_t *mem; /* Pre-registered RAM descr. */
struct pci_dev *gpdev;
struct notifier_block group_notifier;
};
static size_t vfio_pci_nvgpu_rw(struct vfio_pci_device *vdev,
char __user *buf, size_t count, loff_t *ppos, bool iswrite)
{
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
struct vfio_pci_nvgpu_data *data = vdev->region[i].data;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
loff_t posaligned = pos & PAGE_MASK, posoff = pos & ~PAGE_MASK;
size_t sizealigned;
void __iomem *ptr;
if (pos >= vdev->region[i].size)
return -EINVAL;
count = min(count, (size_t)(vdev->region[i].size - pos));
/*
* We map only a bit of GPU RAM for a short time instead of mapping it
* for the guest lifetime as:
*
* 1) we do not know GPU RAM size, only aperture which is 4-8 times
* bigger than actual RAM size (16/32GB RAM vs. 128GB aperture);
* 2) mapping GPU RAM allows CPU to prefetch and if this happens
* before NVLink bridge is reset (which fences GPU RAM),
* hardware management interrupts (HMI) might happen, this
* will freeze NVLink bridge.
*
* This is not fast path anyway.
*/
sizealigned = ALIGN(posoff + count, PAGE_SIZE);
ptr = ioremap_cache(data->gpu_hpa + posaligned, sizealigned);
if (!ptr)
return -EFAULT;
if (iswrite) {
if (copy_from_user(ptr + posoff, buf, count))
count = -EFAULT;
else
*ppos += count;
} else {
if (copy_to_user(buf, ptr + posoff, count))
count = -EFAULT;
else
*ppos += count;
}
iounmap(ptr);
return count;
}
static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev,
struct vfio_pci_region *region)
{
struct vfio_pci_nvgpu_data *data = region->data;
long ret;
/* If there were any mappings at all... */
if (data->mm) {
if (data->mem) {
ret = mm_iommu_put(data->mm, data->mem);
WARN_ON(ret);
}
mmdrop(data->mm);
}
vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
&data->group_notifier);
pnv_npu2_unmap_lpar_dev(data->gpdev);
kfree(data);
}
static vm_fault_t vfio_pci_nvgpu_mmap_fault(struct vm_fault *vmf)
{
vm_fault_t ret;
struct vm_area_struct *vma = vmf->vma;
struct vfio_pci_region *region = vma->vm_private_data;
struct vfio_pci_nvgpu_data *data = region->data;
unsigned long vmf_off = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
unsigned long nv2pg = data->gpu_hpa >> PAGE_SHIFT;
unsigned long vm_pgoff = vma->vm_pgoff &
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
unsigned long pfn = nv2pg + vm_pgoff + vmf_off;
ret = vmf_insert_pfn(vma, vmf->address, pfn);
trace_vfio_pci_nvgpu_mmap_fault(data->gpdev, pfn << PAGE_SHIFT,
vmf->address, ret);
return ret;
}
static const struct vm_operations_struct vfio_pci_nvgpu_mmap_vmops = {
.fault = vfio_pci_nvgpu_mmap_fault,
};
static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region, struct vm_area_struct *vma)
{
int ret;
struct vfio_pci_nvgpu_data *data = region->data;
if (data->useraddr)
return -EPERM;
if (vma->vm_end - vma->vm_start > data->size)
return -EINVAL;
vma->vm_private_data = region;
vma->vm_flags |= VM_PFNMAP;
vma->vm_ops = &vfio_pci_nvgpu_mmap_vmops;
/*
* Calling mm_iommu_newdev() here once as the region is not
* registered yet and therefore right initialization will happen now.
* Other places will use mm_iommu_find() which returns
* registered @mem and does not go gup().
*/
data->useraddr = vma->vm_start;
data->mm = current->mm;
mmgrab(data->mm);
ret = (int) mm_iommu_newdev(data->mm, data->useraddr,
vma_pages(vma), data->gpu_hpa, &data->mem);
trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr,
vma->vm_end - vma->vm_start, ret);
return ret;
}
static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region, struct vfio_info_cap *caps)
{
struct vfio_pci_nvgpu_data *data = region->data;
struct vfio_region_info_cap_nvlink2_ssatgt cap = {
.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
.header.version = 1,
.tgt = data->gpu_tgt
};
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
}
static const struct vfio_pci_regops vfio_pci_nvgpu_regops = {
.rw = vfio_pci_nvgpu_rw,
.release = vfio_pci_nvgpu_release,
.mmap = vfio_pci_nvgpu_mmap,
.add_capability = vfio_pci_nvgpu_add_capability,
};
static int vfio_pci_nvgpu_group_notifier(struct notifier_block *nb,
unsigned long action, void *opaque)
{
struct kvm *kvm = opaque;
struct vfio_pci_nvgpu_data *data = container_of(nb,
struct vfio_pci_nvgpu_data,
group_notifier);
if (action == VFIO_GROUP_NOTIFY_SET_KVM && kvm &&
pnv_npu2_map_lpar_dev(data->gpdev,
kvm->arch.lpid, MSR_DR | MSR_PR))
return NOTIFY_BAD;
return NOTIFY_OK;
}
int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
{
int ret;
u64 reg[2];
u64 tgt = 0;
struct device_node *npu_node, *mem_node;
struct pci_dev *npu_dev;
struct vfio_pci_nvgpu_data *data;
uint32_t mem_phandle = 0;
unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
/*
* PCI config space does not tell us about NVLink presense but
* platform does, use this.
*/
npu_dev = pnv_pci_get_npu_dev(vdev->pdev, 0);
if (!npu_dev)
return -ENODEV;
npu_node = pci_device_to_OF_node(npu_dev);
if (!npu_node)
return -EINVAL;
if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
return -ENODEV;
mem_node = of_find_node_by_phandle(mem_phandle);
if (!mem_node)
return -EINVAL;
if (of_property_read_variable_u64_array(mem_node, "reg", reg,
ARRAY_SIZE(reg), ARRAY_SIZE(reg)) !=
ARRAY_SIZE(reg))
return -EINVAL;
if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
return -EFAULT;
}
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->gpu_hpa = reg[0];
data->gpu_tgt = tgt;
data->size = reg[1];
dev_dbg(&vdev->pdev->dev, "%lx..%lx\n", data->gpu_hpa,
data->gpu_hpa + data->size - 1);
data->gpdev = vdev->pdev;
data->group_notifier.notifier_call = vfio_pci_nvgpu_group_notifier;
ret = vfio_register_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
&events, &data->group_notifier);
if (ret)
goto free_exit;
/*
* We have just set KVM, we do not need the listener anymore.
* Also, keeping it registered means that if more than one GPU is
* assigned, we will get several similar notifiers notifying about
* the same device again which does not help with anything.
*/
vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
&data->group_notifier);
ret = vfio_pci_register_dev_region(vdev,
PCI_VENDOR_ID_NVIDIA | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
&vfio_pci_nvgpu_regops,
data->size,
VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE |
VFIO_REGION_INFO_FLAG_MMAP,
data);
if (ret)
goto free_exit;
return 0;
free_exit:
kfree(data);
return ret;
}
/*
* IBM NPU2 bridge
*/
struct vfio_pci_npu2_data {
void *base; /* ATSD register virtual address, for emulated access */
unsigned long mmio_atsd; /* ATSD physical address */
unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
unsigned int link_speed; /* The link speed from DT's ibm,nvlink-speed */
};
static size_t vfio_pci_npu2_rw(struct vfio_pci_device *vdev,
char __user *buf, size_t count, loff_t *ppos, bool iswrite)
{
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
struct vfio_pci_npu2_data *data = vdev->region[i].data;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
if (pos >= vdev->region[i].size)
return -EINVAL;
count = min(count, (size_t)(vdev->region[i].size - pos));
if (iswrite) {
if (copy_from_user(data->base + pos, buf, count))
return -EFAULT;
} else {
if (copy_to_user(buf, data->base + pos, count))
return -EFAULT;
}
*ppos += count;
return count;
}
static int vfio_pci_npu2_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region, struct vm_area_struct *vma)
{
int ret;
struct vfio_pci_npu2_data *data = region->data;
unsigned long req_len = vma->vm_end - vma->vm_start;
if (req_len != PAGE_SIZE)
return -EINVAL;
vma->vm_flags |= VM_PFNMAP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ret = remap_pfn_range(vma, vma->vm_start, data->mmio_atsd >> PAGE_SHIFT,
req_len, vma->vm_page_prot);
trace_vfio_pci_npu2_mmap(vdev->pdev, data->mmio_atsd, vma->vm_start,
vma->vm_end - vma->vm_start, ret);
return ret;
}
static void vfio_pci_npu2_release(struct vfio_pci_device *vdev,
struct vfio_pci_region *region)
{
struct vfio_pci_npu2_data *data = region->data;
memunmap(data->base);
kfree(data);
}
static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region, struct vfio_info_cap *caps)
{
struct vfio_pci_npu2_data *data = region->data;
struct vfio_region_info_cap_nvlink2_ssatgt captgt = {
.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
.header.version = 1,
.tgt = data->gpu_tgt
};
struct vfio_region_info_cap_nvlink2_lnkspd capspd = {
.header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD,
.header.version = 1,
.link_speed = data->link_speed
};
int ret;
ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt));
if (ret)
return ret;
return vfio_info_add_capability(caps, &capspd.header, sizeof(capspd));
}
static const struct vfio_pci_regops vfio_pci_npu2_regops = {
.rw = vfio_pci_npu2_rw,
.mmap = vfio_pci_npu2_mmap,
.release = vfio_pci_npu2_release,
.add_capability = vfio_pci_npu2_add_capability,
};
int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
{
int ret;
struct vfio_pci_npu2_data *data;
struct device_node *nvlink_dn;
u32 nvlink_index = 0, mem_phandle = 0;
struct pci_dev *npdev = vdev->pdev;
struct device_node *npu_node = pci_device_to_OF_node(npdev);
struct pci_controller *hose = pci_bus_to_host(npdev->bus);
u64 mmio_atsd = 0;
u64 tgt = 0;
u32 link_speed = 0xff;
/*
* PCI config space does not tell us about NVLink presense but
* platform does, use this.
*/
if (!pnv_pci_get_gpu_dev(vdev->pdev))
return -ENODEV;
if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
return -ENODEV;
/*
* NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
* so we can allocate one register per link, using nvlink index as
* a key.
* There is always at least one ATSD register so as long as at least
* NVLink bridge #0 is passed to the guest, ATSD will be available.
*/
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return -ENODEV;
if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
&mmio_atsd)) {
if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
&mmio_atsd)) {
dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
mmio_atsd = 0;
} else {
dev_warn(&vdev->pdev->dev,
"Using fallback ibm,mmio-atsd[0] for ATSD.\n");
}
}
if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
return -EFAULT;
}
if (of_property_read_u32(npu_node, "ibm,nvlink-speed", &link_speed)) {
dev_warn(&vdev->pdev->dev, "No ibm,nvlink-speed found\n");
return -EFAULT;
}
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->mmio_atsd = mmio_atsd;
data->gpu_tgt = tgt;
data->link_speed = link_speed;
if (data->mmio_atsd) {
data->base = memremap(data->mmio_atsd, SZ_64K, MEMREMAP_WT);
if (!data->base) {
ret = -ENOMEM;
goto free_exit;
}
}
/*
* We want to expose the capability even if this specific NVLink
* did not get its own ATSD register because capabilities
* belong to VFIO regions and normally there will be ATSD register
* assigned to the NVLink bridge.
*/
ret = vfio_pci_register_dev_region(vdev,
PCI_VENDOR_ID_IBM |
VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
&vfio_pci_npu2_regops,
data->mmio_atsd ? PAGE_SIZE : 0,
VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE |
VFIO_REGION_INFO_FLAG_MMAP,
data);
if (ret)
goto free_exit;
return 0;
free_exit:
if (data->base)
memunmap(data->base);
kfree(data);
return ret;
}

View file

@ -100,6 +100,7 @@ struct vfio_pci_mmap_vma {
};
struct vfio_pci_device {
struct vfio_device vdev;
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS];
bool bar_mmap_supported[PCI_STD_NUM_BARS];
@ -199,20 +200,6 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
#ifdef CONFIG_VFIO_PCI_NVLINK2
extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
#else
static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
{
return -ENODEV;
}
static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
{
return -ENODEV;
}
#endif
#ifdef CONFIG_S390
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,

View file

@ -26,7 +26,7 @@
#define XGMAC_DMA_CONTROL 0x00000f18 /* Ctrl (Operational Mode) */
#define XGMAC_DMA_INTR_ENA 0x00000f1c /* Interrupt Enable */
/* DMA Control registe defines */
/* DMA Control register defines */
#define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */
#define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */

View file

@ -66,16 +66,18 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
if (ret) {
kfree(vdev->name);
kfree(vdev);
return ret;
}
return ret;
dev_set_drvdata(&adev->dev, vdev);
return 0;
}
static void vfio_amba_remove(struct amba_device *adev)
{
struct vfio_platform_device *vdev =
vfio_platform_remove_common(&adev->dev);
struct vfio_platform_device *vdev = dev_get_drvdata(&adev->dev);
vfio_platform_remove_common(vdev);
kfree(vdev->name);
kfree(vdev);
}

View file

@ -54,23 +54,21 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->reset_required = reset_required;
ret = vfio_platform_probe_common(vdev, &pdev->dev);
if (ret)
if (ret) {
kfree(vdev);
return ret;
return ret;
}
dev_set_drvdata(&pdev->dev, vdev);
return 0;
}
static int vfio_platform_remove(struct platform_device *pdev)
{
struct vfio_platform_device *vdev;
struct vfio_platform_device *vdev = dev_get_drvdata(&pdev->dev);
vdev = vfio_platform_remove_common(&pdev->dev);
if (vdev) {
kfree(vdev);
return 0;
}
return -EINVAL;
vfio_platform_remove_common(vdev);
kfree(vdev);
return 0;
}
static struct platform_driver vfio_platform_driver = {

View file

@ -218,9 +218,10 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
return -EINVAL;
}
static void vfio_platform_release(void *device_data)
static void vfio_platform_release(struct vfio_device *core_vdev)
{
struct vfio_platform_device *vdev = device_data;
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
mutex_lock(&driver_lock);
@ -244,9 +245,10 @@ static void vfio_platform_release(void *device_data)
module_put(vdev->parent_module);
}
static int vfio_platform_open(void *device_data)
static int vfio_platform_open(struct vfio_device *core_vdev)
{
struct vfio_platform_device *vdev = device_data;
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
int ret;
if (!try_module_get(vdev->parent_module))
@ -293,10 +295,12 @@ static int vfio_platform_open(void *device_data)
return ret;
}
static long vfio_platform_ioctl(void *device_data,
static long vfio_platform_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
struct vfio_platform_device *vdev = device_data;
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned long minsz;
if (cmd == VFIO_DEVICE_GET_INFO) {
@ -455,10 +459,11 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg,
return -EFAULT;
}
static ssize_t vfio_platform_read(void *device_data, char __user *buf,
size_t count, loff_t *ppos)
static ssize_t vfio_platform_read(struct vfio_device *core_vdev,
char __user *buf, size_t count, loff_t *ppos)
{
struct vfio_platform_device *vdev = device_data;
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;
@ -531,10 +536,11 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg,
return -EFAULT;
}
static ssize_t vfio_platform_write(void *device_data, const char __user *buf,
static ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_platform_device *vdev = device_data;
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;
@ -573,9 +579,10 @@ static int vfio_platform_mmap_mmio(struct vfio_platform_region region,
req_len, vma->vm_page_prot);
}
static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma)
static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
struct vfio_platform_device *vdev = device_data;
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index;
index = vma->vm_pgoff >> (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT);
@ -659,8 +666,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct iommu_group *group;
int ret;
if (!vdev)
return -EINVAL;
vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops);
ret = vfio_platform_acpi_probe(vdev, dev);
if (ret)
@ -685,13 +691,13 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
goto put_reset;
}
ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
ret = vfio_register_group_dev(&vdev->vdev);
if (ret)
goto put_iommu;
mutex_init(&vdev->igate);
pm_runtime_enable(vdev->device);
pm_runtime_enable(dev);
return 0;
put_iommu:
@ -702,19 +708,13 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
}
EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
void vfio_platform_remove_common(struct vfio_platform_device *vdev)
{
struct vfio_platform_device *vdev;
vfio_unregister_group_dev(&vdev->vdev);
vdev = vfio_del_group_dev(dev);
if (vdev) {
pm_runtime_disable(vdev->device);
vfio_platform_put_reset(vdev);
vfio_iommu_group_put(dev->iommu_group, dev);
}
return vdev;
pm_runtime_disable(vdev->device);
vfio_platform_put_reset(vdev);
vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev);
}
EXPORT_SYMBOL_GPL(vfio_platform_remove_common);

View file

@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/vfio.h>
#define VFIO_PLATFORM_OFFSET_SHIFT 40
#define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1)
@ -42,6 +43,7 @@ struct vfio_platform_region {
};
struct vfio_platform_device {
struct vfio_device vdev;
struct vfio_platform_region *regions;
u32 num_regions;
struct vfio_platform_irq *irqs;
@ -80,8 +82,7 @@ struct vfio_platform_reset_node {
extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct device *dev);
extern struct vfio_platform_device *vfio_platform_remove_common
(struct device *dev);
void vfio_platform_remove_common(struct vfio_platform_device *vdev);
extern int vfio_platform_irq_init(struct vfio_platform_device *vdev);
extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev);

View file

@ -46,7 +46,6 @@ static struct vfio {
struct mutex group_lock;
struct cdev group_cdev;
dev_t group_devt;
wait_queue_head_t release_q;
} vfio;
struct vfio_iommu_driver {
@ -90,15 +89,6 @@ struct vfio_group {
struct blocking_notifier_head notifier;
};
struct vfio_device {
struct kref kref;
struct device *dev;
const struct vfio_device_ops *ops;
struct vfio_group *group;
struct list_head group_next;
void *device_data;
};
#ifdef CONFIG_VFIO_NOIOMMU
static bool noiommu __read_mostly;
module_param_named(enable_unsafe_noiommu_mode,
@ -109,8 +99,8 @@ MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. Thi
/*
* vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
* and remove functions, any use cases other than acquiring the first
* reference for the purpose of calling vfio_add_group_dev() or removing
* that symmetric reference after vfio_del_group_dev() should use the raw
* reference for the purpose of calling vfio_register_group_dev() or removing
* that symmetric reference after vfio_unregister_group_dev() should use the raw
* iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
* removes the device from the dummy group and cannot be nested.
*/
@ -532,67 +522,17 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
/**
* Device objects - create, release, get, put, search
*/
static
struct vfio_device *vfio_group_create_device(struct vfio_group *group,
struct device *dev,
const struct vfio_device_ops *ops,
void *device_data)
{
struct vfio_device *device;
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
return ERR_PTR(-ENOMEM);
kref_init(&device->kref);
device->dev = dev;
device->group = group;
device->ops = ops;
device->device_data = device_data;
dev_set_drvdata(dev, device);
/* No need to get group_lock, caller has group reference */
vfio_group_get(group);
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
group->dev_counter++;
mutex_unlock(&group->device_lock);
return device;
}
static void vfio_device_release(struct kref *kref)
{
struct vfio_device *device = container_of(kref,
struct vfio_device, kref);
struct vfio_group *group = device->group;
list_del(&device->group_next);
group->dev_counter--;
mutex_unlock(&group->device_lock);
dev_set_drvdata(device->dev, NULL);
kfree(device);
/* vfio_del_group_dev may be waiting for this device */
wake_up(&vfio.release_q);
}
/* Device reference always implies a group reference */
void vfio_device_put(struct vfio_device *device)
{
struct vfio_group *group = device->group;
kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
vfio_group_put(group);
if (refcount_dec_and_test(&device->refcount))
complete(&device->comp);
}
EXPORT_SYMBOL_GPL(vfio_device_put);
static void vfio_device_get(struct vfio_device *device)
static bool vfio_device_try_get(struct vfio_device *device)
{
vfio_group_get(device->group);
kref_get(&device->kref);
return refcount_inc_not_zero(&device->refcount);
}
static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
@ -602,8 +542,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_for_each_entry(device, &group->device_list, group_next) {
if (device->dev == dev) {
vfio_device_get(device);
if (device->dev == dev && vfio_device_try_get(device)) {
mutex_unlock(&group->device_lock);
return device;
}
@ -801,14 +740,22 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
/**
* VFIO driver API
*/
int vfio_add_group_dev(struct device *dev,
const struct vfio_device_ops *ops, void *device_data)
void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
const struct vfio_device_ops *ops)
{
init_completion(&device->comp);
device->dev = dev;
device->ops = ops;
}
EXPORT_SYMBOL_GPL(vfio_init_group_dev);
int vfio_register_group_dev(struct vfio_device *device)
{
struct vfio_device *existing_device;
struct iommu_group *iommu_group;
struct vfio_group *group;
struct vfio_device *device;
iommu_group = iommu_group_get(dev);
iommu_group = iommu_group_get(device->dev);
if (!iommu_group)
return -EINVAL;
@ -827,31 +774,29 @@ int vfio_add_group_dev(struct device *dev,
iommu_group_put(iommu_group);
}
device = vfio_group_get_device(group, dev);
if (device) {
dev_WARN(dev, "Device already exists on group %d\n",
existing_device = vfio_group_get_device(group, device->dev);
if (existing_device) {
dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(iommu_group));
vfio_device_put(device);
vfio_device_put(existing_device);
vfio_group_put(group);
return -EBUSY;
}
device = vfio_group_create_device(group, dev, ops, device_data);
if (IS_ERR(device)) {
vfio_group_put(group);
return PTR_ERR(device);
}
/* Our reference on group is moved to the device */
device->group = group;
/*
* Drop all but the vfio_device reference. The vfio_device holds
* a reference to the vfio_group, which holds a reference to the
* iommu_group.
*/
vfio_group_put(group);
/* Refcounting can't start until the driver calls register */
refcount_set(&device->refcount, 1);
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
group->dev_counter++;
mutex_unlock(&group->device_lock);
return 0;
}
EXPORT_SYMBOL_GPL(vfio_add_group_dev);
EXPORT_SYMBOL_GPL(vfio_register_group_dev);
/**
* Get a reference to the vfio_device for a device. Even if the
@ -886,7 +831,7 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
int ret;
if (it->ops->match) {
ret = it->ops->match(it->device_data, buf);
ret = it->ops->match(it, buf);
if (ret < 0) {
device = ERR_PTR(ret);
break;
@ -895,9 +840,8 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
ret = !strcmp(dev_name(it->dev), buf);
}
if (ret) {
if (ret && vfio_device_try_get(it)) {
device = it;
vfio_device_get(device);
break;
}
}
@ -906,33 +850,16 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
return device;
}
/*
* Caller must hold a reference to the vfio_device
*/
void *vfio_device_data(struct vfio_device *device)
{
return device->device_data;
}
EXPORT_SYMBOL_GPL(vfio_device_data);
/*
* Decrement the device reference count and wait for the device to be
* removed. Open file descriptors for the device... */
void *vfio_del_group_dev(struct device *dev)
void vfio_unregister_group_dev(struct vfio_device *device)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct vfio_device *device = dev_get_drvdata(dev);
struct vfio_group *group = device->group;
void *device_data = device->device_data;
struct vfio_unbound_dev *unbound;
unsigned int i = 0;
bool interrupted = false;
/*
* The group exists so long as we have a device reference. Get
* a group reference and use it to scan for the device going away.
*/
vfio_group_get(group);
long rc;
/*
* When the device is removed from the group, the group suddenly
@ -945,7 +872,7 @@ void *vfio_del_group_dev(struct device *dev)
*/
unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
if (unbound) {
unbound->dev = dev;
unbound->dev = device->dev;
mutex_lock(&group->unbound_lock);
list_add(&unbound->unbound_next, &group->unbound_list);
mutex_unlock(&group->unbound_lock);
@ -953,44 +880,33 @@ void *vfio_del_group_dev(struct device *dev)
WARN_ON(!unbound);
vfio_device_put(device);
/*
* If the device is still present in the group after the above
* 'put', then it is in use and we need to request it from the
* bus driver. The driver may in turn need to request the
* device from the user. We send the request on an arbitrary
* interval with counter to allow the driver to take escalating
* measures to release the device if it has the ability to do so.
*/
add_wait_queue(&vfio.release_q, &wait);
do {
device = vfio_group_get_device(group, dev);
if (!device)
break;
rc = try_wait_for_completion(&device->comp);
while (rc <= 0) {
if (device->ops->request)
device->ops->request(device_data, i++);
vfio_device_put(device);
device->ops->request(device, i++);
if (interrupted) {
wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
rc = wait_for_completion_timeout(&device->comp,
HZ * 10);
} else {
wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
if (signal_pending(current)) {
rc = wait_for_completion_interruptible_timeout(
&device->comp, HZ * 10);
if (rc < 0) {
interrupted = true;
dev_warn(dev,
dev_warn(device->dev,
"Device is currently in use, task"
" \"%s\" (%d) "
"blocked until device is released",
current->comm, task_pid_nr(current));
}
}
}
} while (1);
mutex_lock(&group->device_lock);
list_del(&device->group_next);
group->dev_counter--;
mutex_unlock(&group->device_lock);
remove_wait_queue(&vfio.release_q, &wait);
/*
* In order to support multiple devices per group, devices can be
* plucked from the group while other devices in the group are still
@ -1008,11 +924,10 @@ void *vfio_del_group_dev(struct device *dev)
if (list_empty(&group->device_list))
wait_event(group->container_q, !group->container);
/* Matches the get in vfio_register_group_dev() */
vfio_group_put(group);
return device_data;
}
EXPORT_SYMBOL_GPL(vfio_del_group_dev);
EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/**
* VFIO base fd, /dev/vfio/vfio
@ -1454,7 +1369,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(device))
return PTR_ERR(device);
ret = device->ops->open(device->device_data);
ret = device->ops->open(device);
if (ret) {
vfio_device_put(device);
return ret;
@ -1466,7 +1381,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
*/
ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0) {
device->ops->release(device->device_data);
device->ops->release(device);
vfio_device_put(device);
return ret;
}
@ -1476,7 +1391,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(filep)) {
put_unused_fd(ret);
ret = PTR_ERR(filep);
device->ops->release(device->device_data);
device->ops->release(device);
vfio_device_put(device);
return ret;
}
@ -1633,7 +1548,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
device->ops->release(device->device_data);
device->ops->release(device);
vfio_group_try_dissolve_container(device->group);
@ -1650,7 +1565,7 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
if (unlikely(!device->ops->ioctl))
return -EINVAL;
return device->ops->ioctl(device->device_data, cmd, arg);
return device->ops->ioctl(device, cmd, arg);
}
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
@ -1661,7 +1576,7 @@ static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
if (unlikely(!device->ops->read))
return -EINVAL;
return device->ops->read(device->device_data, buf, count, ppos);
return device->ops->read(device, buf, count, ppos);
}
static ssize_t vfio_device_fops_write(struct file *filep,
@ -1673,7 +1588,7 @@ static ssize_t vfio_device_fops_write(struct file *filep,
if (unlikely(!device->ops->write))
return -EINVAL;
return device->ops->write(device->device_data, buf, count, ppos);
return device->ops->write(device, buf, count, ppos);
}
static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
@ -1683,7 +1598,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
if (unlikely(!device->ops->mmap))
return -EINVAL;
return device->ops->mmap(device->device_data, vma);
return device->ops->mmap(device, vma);
}
static const struct file_operations vfio_device_fops = {
@ -2379,7 +2294,6 @@ static int __init vfio_init(void)
mutex_init(&vfio.iommu_drivers_lock);
INIT_LIST_HEAD(&vfio.group_list);
INIT_LIST_HEAD(&vfio.iommu_drivers_list);
init_waitqueue_head(&vfio.release_q);
ret = misc_register(&vfio_dev);
if (ret) {

View file

@ -16,7 +16,7 @@
* IOMMU to support the IOMMU API and have few to no restrictions around
* the IOVA range that can be mapped. The Type1 IOMMU is currently
* optimized for relatively static mappings of a userspace process with
* userpsace pages pinned into memory. We also assume devices and IOMMU
* userspace pages pinned into memory. We also assume devices and IOMMU
* domains are PCI based as the IOMMU API is still centered around a
* device/bus interface rather than a group interface.
*/
@ -77,7 +77,6 @@ struct vfio_iommu {
bool v2;
bool nesting;
bool dirty_page_tracking;
bool pinned_page_dirty_scope;
bool container_open;
};
@ -877,7 +876,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
/*
* If iommu capable domain exist in the container then all pages are
* already pinned and accounted. Accouting should be done if there is no
* already pinned and accounted. Accounting should be done if there is no
* iommu capable domain in the container.
*/
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
@ -960,7 +959,7 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
bool do_accounting;
int i;
if (!iommu || !user_pfn)
if (!iommu || !user_pfn || npage <= 0)
return -EINVAL;
/* Supported for v2 version only */
@ -977,13 +976,13 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
iova = user_pfn[i] << PAGE_SHIFT;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
if (!dma)
goto unpin_exit;
break;
vfio_unpin_page_external(dma, iova, do_accounting);
}
unpin_exit:
mutex_unlock(&iommu->lock);
return i > npage ? npage : (i > 0 ? i : -EINVAL);
return i > 0 ? i : -EINVAL;
}
static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
@ -1933,28 +1932,13 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
return ret;
}
static struct device *vfio_mdev_get_iommu_device(struct device *dev)
{
struct device *(*fn)(struct device *dev);
struct device *iommu_device;
fn = symbol_get(mdev_get_iommu_device);
if (fn) {
iommu_device = fn(dev);
symbol_put(mdev_get_iommu_device);
return iommu_device;
}
return NULL;
}
static int vfio_mdev_attach_domain(struct device *dev, void *data)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct iommu_domain *domain = data;
struct device *iommu_device;
iommu_device = vfio_mdev_get_iommu_device(dev);
iommu_device = mdev_get_iommu_device(mdev);
if (iommu_device) {
if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
return iommu_aux_attach_device(domain, iommu_device);
@ -1967,10 +1951,11 @@ static int vfio_mdev_attach_domain(struct device *dev, void *data)
static int vfio_mdev_detach_domain(struct device *dev, void *data)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct iommu_domain *domain = data;
struct device *iommu_device;
iommu_device = vfio_mdev_get_iommu_device(dev);
iommu_device = mdev_get_iommu_device(mdev);
if (iommu_device) {
if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
iommu_aux_detach_device(domain, iommu_device);
@ -2018,9 +2003,10 @@ static bool vfio_bus_is_mdev(struct bus_type *bus)
static int vfio_mdev_iommu_device(struct device *dev, void *data)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct device **old = data, *new;
new = vfio_mdev_get_iommu_device(dev);
new = mdev_get_iommu_device(mdev);
if (!new || (*old && *old != new))
return -EINVAL;
@ -2177,7 +2163,7 @@ static int vfio_iommu_resv_exclude(struct list_head *iova,
continue;
/*
* Insert a new node if current node overlaps with the
* reserve region to exlude that from valid iova range.
* reserve region to exclude that from valid iova range.
* Note that, new node is inserted before the current
* node and finally the current node is deleted keeping
* the list updated and sorted.

View file

@ -10,7 +10,22 @@
#ifndef MDEV_H
#define MDEV_H
struct mdev_device;
struct mdev_type;
struct mdev_device {
struct device dev;
guid_t uuid;
void *driver_data;
struct list_head next;
struct mdev_type *type;
struct device *iommu_device;
bool active;
};
static inline struct mdev_device *to_mdev_device(struct device *dev)
{
return container_of(dev, struct mdev_device, dev);
}
/*
* Called by the parent device driver to set the device which represents
@ -19,12 +34,21 @@ struct mdev_device;
*
* @dev: the mediated device that iommu will isolate.
* @iommu_device: a pci device which represents the iommu for @dev.
*
* Return 0 for success, otherwise negative error value.
*/
int mdev_set_iommu_device(struct device *dev, struct device *iommu_device);
static inline void mdev_set_iommu_device(struct mdev_device *mdev,
struct device *iommu_device)
{
mdev->iommu_device = iommu_device;
}
struct device *mdev_get_iommu_device(struct device *dev);
static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev)
{
return mdev->iommu_device;
}
unsigned int mdev_get_type_group_id(struct mdev_device *mdev);
unsigned int mtype_get_type_group_id(struct mdev_type *mtype);
struct device *mtype_get_parent_dev(struct mdev_type *mtype);
/**
* struct mdev_parent_ops - Structure to be registered for each parent device to
@ -38,7 +62,6 @@ struct device *mdev_get_iommu_device(struct device *dev);
* @create: Called to allocate basic resources in parent device's
* driver for a particular mediated device. It is
* mandatory to provide create ops.
* @kobj: kobject of type for which 'create' is called.
* @mdev: mdev_device structure on of mediated device
* that is being created
* Returns integer: success (0) or error (< 0)
@ -84,7 +107,7 @@ struct mdev_parent_ops {
const struct attribute_group **mdev_attr_groups;
struct attribute_group **supported_type_groups;
int (*create)(struct kobject *kobj, struct mdev_device *mdev);
int (*create)(struct mdev_device *mdev);
int (*remove)(struct mdev_device *mdev);
int (*open)(struct mdev_device *mdev);
void (*release)(struct mdev_device *mdev);
@ -101,9 +124,11 @@ struct mdev_parent_ops {
/* interface for exporting mdev supported type attributes */
struct mdev_type_attribute {
struct attribute attr;
ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf);
ssize_t (*store)(struct kobject *kobj, struct device *dev,
const char *buf, size_t count);
ssize_t (*show)(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf);
ssize_t (*store)(struct mdev_type *mtype,
struct mdev_type_attribute *attr, const char *buf,
size_t count);
};
#define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \
@ -118,35 +143,46 @@ struct mdev_type_attribute mdev_type_attr_##_name = \
/**
* struct mdev_driver - Mediated device driver
* @name: driver name
* @probe: called when new device created
* @remove: called when device removed
* @driver: device driver structure
*
**/
struct mdev_driver {
const char *name;
int (*probe)(struct device *dev);
void (*remove)(struct device *dev);
int (*probe)(struct mdev_device *dev);
void (*remove)(struct mdev_device *dev);
struct device_driver driver;
};
#define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver)
void *mdev_get_drvdata(struct mdev_device *mdev);
void mdev_set_drvdata(struct mdev_device *mdev, void *data);
const guid_t *mdev_uuid(struct mdev_device *mdev);
static inline void *mdev_get_drvdata(struct mdev_device *mdev)
{
return mdev->driver_data;
}
static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data)
{
mdev->driver_data = data;
}
static inline const guid_t *mdev_uuid(struct mdev_device *mdev)
{
return &mdev->uuid;
}
extern struct bus_type mdev_bus_type;
int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops);
void mdev_unregister_device(struct device *dev);
int mdev_register_driver(struct mdev_driver *drv, struct module *owner);
int mdev_register_driver(struct mdev_driver *drv);
void mdev_unregister_driver(struct mdev_driver *drv);
struct device *mdev_parent_dev(struct mdev_device *mdev);
struct device *mdev_dev(struct mdev_device *mdev);
struct mdev_device *mdev_from_dev(struct device *dev);
static inline struct device *mdev_dev(struct mdev_device *mdev)
{
return &mdev->dev;
}
static inline struct mdev_device *mdev_from_dev(struct device *dev)
{
return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL;
}
#endif /* MDEV_H */

View file

@ -15,6 +15,17 @@
#include <linux/poll.h>
#include <uapi/linux/vfio.h>
struct vfio_device {
struct device *dev;
const struct vfio_device_ops *ops;
struct vfio_group *group;
/* Members below here are private, not for driver use */
refcount_t refcount;
struct completion comp;
struct list_head group_next;
};
/**
* struct vfio_device_ops - VFIO bus driver device callbacks
*
@ -32,30 +43,28 @@
*/
struct vfio_device_ops {
char *name;
int (*open)(void *device_data);
void (*release)(void *device_data);
ssize_t (*read)(void *device_data, char __user *buf,
int (*open)(struct vfio_device *vdev);
void (*release)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t (*write)(void *device_data, const char __user *buf,
ssize_t (*write)(struct vfio_device *vdev, const char __user *buf,
size_t count, loff_t *size);
long (*ioctl)(void *device_data, unsigned int cmd,
long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
int (*mmap)(void *device_data, struct vm_area_struct *vma);
void (*request)(void *device_data, unsigned int count);
int (*match)(void *device_data, char *buf);
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
};
extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
extern int vfio_add_group_dev(struct device *dev,
const struct vfio_device_ops *ops,
void *device_data);
extern void *vfio_del_group_dev(struct device *dev);
void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
const struct vfio_device_ops *ops);
int vfio_register_group_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device);
extern void *vfio_device_data(struct vfio_device *device);
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {

View file

@ -333,17 +333,10 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
/* 10de vendor PCI sub-types */
/*
* NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
*/
#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
/* subtype 1 was VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM, don't use */
/* 1014 vendor PCI sub-types */
/*
* IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
* to do TLB invalidation on a GPU.
*/
#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
/* subtype 1 was VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD, don't use */
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
@ -637,32 +630,9 @@ struct vfio_device_migration_info {
*/
#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
/*
* Capability with compressed real address (aka SSA - small system address)
* where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
* and by the userspace to associate a NVLink bridge with a GPU.
*/
#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4
/* subtype 4 was VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, don't use */
struct vfio_region_info_cap_nvlink2_ssatgt {
struct vfio_info_cap_header header;
__u64 tgt;
};
/*
* Capability with an NVLink link speed. The value is read by
* the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
* property in the device tree. The value is fixed in the hardware
* and failing to provide the correct value results in the link
* not working with no indication from the driver why.
*/
#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5
struct vfio_region_info_cap_nvlink2_lnkspd {
struct vfio_info_cap_header header;
__u32 link_speed;
__u32 __pad;
};
/* subtype 5 was VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD, don't use */
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,

View file

@ -205,16 +205,6 @@ static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
static struct page *mbochs_get_page(struct mdev_state *mdev_state,
pgoff_t pgoff);
static const struct mbochs_type *mbochs_find_type(struct kobject *kobj)
{
int i;
for (i = 0; i < ARRAY_SIZE(mbochs_types); i++)
if (strcmp(mbochs_types[i].name, kobj->name) == 0)
return mbochs_types + i;
return NULL;
}
static void mbochs_create_config_space(struct mdev_state *mdev_state)
{
STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
@ -516,9 +506,10 @@ static int mbochs_reset(struct mdev_device *mdev)
return 0;
}
static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
static int mbochs_create(struct mdev_device *mdev)
{
const struct mbochs_type *type = mbochs_find_type(kobj);
const struct mbochs_type *type =
&mbochs_types[mdev_get_type_group_id(mdev)];
struct device *dev = mdev_dev(mdev);
struct mdev_state *mdev_state;
@ -544,7 +535,7 @@ static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
goto err_mem;
dev_info(dev, "%s: %s, %d MB, %ld pages\n", __func__,
kobj->name, type->mbytes, mdev_state->pagecount);
type->name, type->mbytes, mdev_state->pagecount);
mutex_init(&mdev_state->ops_lock);
mdev_state->mdev = mdev;
@ -1334,44 +1325,50 @@ static const struct attribute_group mdev_dev_group = {
.attrs = mdev_dev_attrs,
};
const struct attribute_group *mdev_dev_groups[] = {
static const struct attribute_group *mdev_dev_groups[] = {
&mdev_dev_group,
NULL,
};
static ssize_t
name_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t name_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", kobj->name);
}
MDEV_TYPE_ATTR_RO(name);
const struct mbochs_type *type =
&mbochs_types[mtype_get_type_group_id(mtype)];
static ssize_t
description_show(struct kobject *kobj, struct device *dev, char *buf)
return sprintf(buf, "%s\n", type->name);
}
static MDEV_TYPE_ATTR_RO(name);
static ssize_t description_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
const struct mbochs_type *type = mbochs_find_type(kobj);
const struct mbochs_type *type =
&mbochs_types[mtype_get_type_group_id(mtype)];
return sprintf(buf, "virtual display, %d MB video memory\n",
type ? type->mbytes : 0);
}
MDEV_TYPE_ATTR_RO(description);
static MDEV_TYPE_ATTR_RO(description);
static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t available_instances_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr,
char *buf)
{
const struct mbochs_type *type = mbochs_find_type(kobj);
const struct mbochs_type *type =
&mbochs_types[mtype_get_type_group_id(mtype)];
int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes;
return sprintf(buf, "%d\n", count);
}
MDEV_TYPE_ATTR_RO(available_instances);
static MDEV_TYPE_ATTR_RO(available_instances);
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t device_api_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}
MDEV_TYPE_ATTR_RO(device_api);
static MDEV_TYPE_ATTR_RO(device_api);
static struct attribute *mdev_types_attrs[] = {
&mdev_type_attr_name.attr,

View file

@ -99,16 +99,6 @@ struct mdev_state {
void *memblk;
};
static const struct mdpy_type *mdpy_find_type(struct kobject *kobj)
{
int i;
for (i = 0; i < ARRAY_SIZE(mdpy_types); i++)
if (strcmp(mdpy_types[i].name, kobj->name) == 0)
return mdpy_types + i;
return NULL;
}
static void mdpy_create_config_space(struct mdev_state *mdev_state)
{
STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
@ -226,9 +216,10 @@ static int mdpy_reset(struct mdev_device *mdev)
return 0;
}
static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev)
static int mdpy_create(struct mdev_device *mdev)
{
const struct mdpy_type *type = mdpy_find_type(kobj);
const struct mdpy_type *type =
&mdpy_types[mdev_get_type_group_id(mdev)];
struct device *dev = mdev_dev(mdev);
struct mdev_state *mdev_state;
u32 fbsize;
@ -246,8 +237,6 @@ static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev)
return -ENOMEM;
}
if (!type)
type = &mdpy_types[0];
fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp);
mdev_state->memblk = vmalloc_user(fbsize);
@ -256,8 +245,8 @@ static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev)
kfree(mdev_state);
return -ENOMEM;
}
dev_info(dev, "%s: %s (%dx%d)\n",
__func__, kobj->name, type->width, type->height);
dev_info(dev, "%s: %s (%dx%d)\n", __func__, type->name, type->width,
type->height);
mutex_init(&mdev_state->ops_lock);
mdev_state->mdev = mdev;
@ -658,42 +647,47 @@ static const struct attribute_group mdev_dev_group = {
.attrs = mdev_dev_attrs,
};
const struct attribute_group *mdev_dev_groups[] = {
static const struct attribute_group *mdev_dev_groups[] = {
&mdev_dev_group,
NULL,
};
static ssize_t
name_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t name_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", kobj->name);
}
MDEV_TYPE_ATTR_RO(name);
const struct mdpy_type *type =
&mdpy_types[mtype_get_type_group_id(mtype)];
static ssize_t
description_show(struct kobject *kobj, struct device *dev, char *buf)
return sprintf(buf, "%s\n", type->name);
}
static MDEV_TYPE_ATTR_RO(name);
static ssize_t description_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
const struct mdpy_type *type = mdpy_find_type(kobj);
const struct mdpy_type *type =
&mdpy_types[mtype_get_type_group_id(mtype)];
return sprintf(buf, "virtual display, %dx%d framebuffer\n",
type ? type->width : 0,
type ? type->height : 0);
}
MDEV_TYPE_ATTR_RO(description);
static MDEV_TYPE_ATTR_RO(description);
static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t available_instances_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", max_devices - mdpy_count);
}
MDEV_TYPE_ATTR_RO(available_instances);
static MDEV_TYPE_ATTR_RO(available_instances);
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t device_api_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}
MDEV_TYPE_ATTR_RO(device_api);
static MDEV_TYPE_ATTR_RO(device_api);
static struct attribute *mdev_types_attrs[] = {
&mdev_type_attr_name.attr,

View file

@ -708,26 +708,10 @@ static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,
return ret;
}
static int mtty_create(struct kobject *kobj, struct mdev_device *mdev)
static int mtty_create(struct mdev_device *mdev)
{
struct mdev_state *mdev_state;
char name[MTTY_STRING_LEN];
int nr_ports = 0, i;
if (!mdev)
return -EINVAL;
for (i = 0; i < 2; i++) {
snprintf(name, MTTY_STRING_LEN, "%s-%d",
dev_driver_string(mdev_parent_dev(mdev)), i + 1);
if (!strcmp(kobj->name, name)) {
nr_ports = i + 1;
break;
}
}
if (!nr_ports)
return -EINVAL;
int nr_ports = mdev_get_type_group_id(mdev) + 1;
mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
if (mdev_state == NULL)
@ -1308,44 +1292,25 @@ static const struct attribute_group *mdev_dev_groups[] = {
NULL,
};
static ssize_t
name_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t name_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
char name[MTTY_STRING_LEN];
int i;
const char *name_str[2] = {"Single port serial", "Dual port serial"};
static const char *name_str[2] = { "Single port serial",
"Dual port serial" };
for (i = 0; i < 2; i++) {
snprintf(name, MTTY_STRING_LEN, "%s-%d",
dev_driver_string(dev), i + 1);
if (!strcmp(kobj->name, name))
return sprintf(buf, "%s\n", name_str[i]);
}
return -EINVAL;
return sysfs_emit(buf, "%s\n",
name_str[mtype_get_type_group_id(mtype)]);
}
static MDEV_TYPE_ATTR_RO(name);
static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
static ssize_t available_instances_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr,
char *buf)
{
char name[MTTY_STRING_LEN];
int i;
struct mdev_state *mds;
int ports = 0, used = 0;
for (i = 0; i < 2; i++) {
snprintf(name, MTTY_STRING_LEN, "%s-%d",
dev_driver_string(dev), i + 1);
if (!strcmp(kobj->name, name)) {
ports = i + 1;
break;
}
}
if (!ports)
return -EINVAL;
unsigned int ports = mtype_get_type_group_id(mtype) + 1;
int used = 0;
list_for_each_entry(mds, &mdev_devices_list, next)
used += mds->nr_ports;
@ -1355,9 +1320,8 @@ available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
static MDEV_TYPE_ATTR_RO(available_instances);
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
char *buf)
static ssize_t device_api_show(struct mdev_type *mtype,
struct mdev_type_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}