linux-stable/include/linux/vfio.h
Yi Liu 8b6f173a4c vfio: Add cdev for vfio_device
This adds cdev support for vfio_device. It allows the user to directly
open a vfio device w/o using the legacy container/group interface, as a
prerequisite for supporting new iommu features like nested translation
and etc.

The device fd opened in this manner doesn't have the capability to access
the device as the fops open() doesn't open the device until the successful
VFIO_DEVICE_BIND_IOMMUFD ioctl which will be added in a later patch.

With this patch, devices registered to vfio core would have both the legacy
group and the new device interfaces created.

- group interface : /dev/vfio/$groupID
- device interface: /dev/vfio/devices/vfioX - normal device
		    ("X" is a unique number across vfio devices)

For a given device, the user can identify the matching vfioX by searching
the vfio-dev folder under the sysfs path of the device. Take PCI device
(0000:6a:01.0) as an example, /sys/bus/pci/devices/0000\:6a\:01.0/vfio-dev/vfioX
implies the matching vfioX under /dev/vfio/devices/, and vfio-dev/vfioX/dev
contains the major:minor number of the matching /dev/vfio/devices/vfioX.
The user can get device fd by opening the /dev/vfio/devices/vfioX.

The vfio_device cdev logic in this patch:
*) __vfio_register_dev() path ends up doing cdev_device_add() for each
   vfio_device if VFIO_DEVICE_CDEV configured.
*) vfio_unregister_group_dev() path does cdev_device_del();

cdev interface does not support noiommu devices, so VFIO only creates the
legacy group interface for the physical devices that do not have IOMMU.
noiommu users should use the legacy group interface.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Tested-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20230718135551.6592-19-yi.l.liu@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2023-07-25 10:19:40 -06:00

341 lines
12 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* VFIO API definition
*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*/
#ifndef VFIO_H
#define VFIO_H
#include <linux/iommu.h>
#include <linux/mm.h>
#include <linux/workqueue.h>
#include <linux/poll.h>
#include <linux/cdev.h>
#include <uapi/linux/vfio.h>
#include <linux/iova_bitmap.h>
struct kvm;
struct iommufd_ctx;
struct iommufd_device;
struct iommufd_access;
/*
* VFIO devices can be placed in a set, this allows all devices to share this
* structure and the VFIO core will provide a lock that is held around
* open_device()/close_device() for all devices in the set.
*/
struct vfio_device_set {
void *set_id;
struct mutex lock;
struct list_head device_list;
unsigned int device_count;
};
struct vfio_device {
struct device *dev;
const struct vfio_device_ops *ops;
/*
* mig_ops/log_ops is a static property of the vfio_device which must
* be set prior to registering the vfio_device.
*/
const struct vfio_migration_ops *mig_ops;
const struct vfio_log_ops *log_ops;
struct vfio_group *group;
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
unsigned int migration_flags;
struct kvm *kvm;
/* Members below here are private, not for driver use */
unsigned int index;
struct device device; /* device.kref covers object life circle */
#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
struct cdev cdev;
#endif
refcount_t refcount; /* user count on registered device*/
unsigned int open_count;
struct completion comp;
struct list_head group_next;
struct list_head iommu_entry;
struct iommufd_access *iommufd_access;
void (*put_kvm)(struct kvm *kvm);
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device;
bool iommufd_attached;
#endif
};
/**
* struct vfio_device_ops - VFIO bus driver device callbacks
*
* @name: Name of the device driver.
* @init: initialize private fields in device structure
* @release: Reclaim private fields in device structure
* @bind_iommufd: Called when binding the device to an iommufd
* @unbind_iommufd: Opposite of bind_iommufd
* @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the
* bound iommufd. Undo in unbind_iommufd if @detach_ioas is not
* called.
* @detach_ioas: Opposite of attach_ioas
* @open_device: Called when the first file descriptor is opened for this device
* @close_device: Opposite of open_device
* @read: Perform read(2) on device file descriptor
* @write: Perform write(2) on device file descriptor
* @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
* operations documented below
* @mmap: Perform mmap(2) on a region of the device file descriptor
* @request: Request for the bus driver to release the device
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
* @dma_unmap: Called when userspace unmaps IOVA from the container
* this device is attached to.
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
*/
struct vfio_device_ops {
char *name;
int (*init)(struct vfio_device *vdev);
void (*release)(struct vfio_device *vdev);
int (*bind_iommufd)(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void (*unbind_iommufd)(struct vfio_device *vdev);
int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
void (*detach_ioas)(struct vfio_device *vdev);
int (*open_device)(struct vfio_device *vdev);
void (*close_device)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t (*write)(struct vfio_device *vdev, const char __user *buf,
size_t count, loff_t *size);
long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
};
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev);
int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx);
int vfio_iommufd_physical_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev);
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev);
#else
static inline struct iommufd_ctx *
vfio_iommufd_device_ictx(struct vfio_device *vdev)
{
return NULL;
}
static inline int
vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx)
{
return VFIO_PCI_DEVID_NOT_OWNED;
}
#define vfio_iommufd_physical_bind \
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
u32 *out_device_id)) NULL)
#define vfio_iommufd_physical_unbind \
((void (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_physical_attach_ioas \
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
#define vfio_iommufd_physical_detach_ioas \
((void (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_emulated_bind \
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
u32 *out_device_id)) NULL)
#define vfio_iommufd_emulated_unbind \
((void (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_emulated_attach_ioas \
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
#define vfio_iommufd_emulated_detach_ioas \
((void (*)(struct vfio_device *vdev)) NULL)
#endif
static inline bool vfio_device_cdev_opened(struct vfio_device *device)
{
return false;
}
/**
* struct vfio_migration_ops - VFIO bus device driver migration callbacks
*
* @migration_set_state: Optional callback to change the migration state for
* devices that support migration. It's mandatory for
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
* The returned FD is used for data transfer according to the FSM
* definition. The driver is responsible to ensure that FD reaches end
* of stream or error whenever the migration FSM leaves a data transfer
* state or before close_device() returns.
* @migration_get_state: Optional callback to get the migration state for
* devices that support migration. It's mandatory for
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
* @migration_get_data_size: Optional callback to get the estimated data
* length that will be required to complete stop copy. It's mandatory for
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
*/
struct vfio_migration_ops {
struct file *(*migration_set_state)(
struct vfio_device *device,
enum vfio_device_mig_state new_state);
int (*migration_get_state)(struct vfio_device *device,
enum vfio_device_mig_state *curr_state);
int (*migration_get_data_size)(struct vfio_device *device,
unsigned long *stop_copy_length);
};
/**
* struct vfio_log_ops - VFIO bus device driver logging callbacks
*
* @log_start: Optional callback to ask the device start DMA logging.
* @log_stop: Optional callback to ask the device stop DMA logging.
* @log_read_and_clear: Optional callback to ask the device read
* and clear the dirty DMAs in some given range.
*
* The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
* of features does not track logging state relative to the device,
* therefore the device implementation of vfio_log_ops must handle
* arbitrary user requests. This includes rejecting subsequent calls
* to log_start without an intervening log_stop, as well as graceful
* handling of log_stop and log_read_and_clear from invalid states.
*/
struct vfio_log_ops {
int (*log_start)(struct vfio_device *device,
struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
int (*log_stop)(struct vfio_device *device);
int (*log_read_and_clear)(struct vfio_device *device,
unsigned long iova, unsigned long length,
struct iova_bitmap *dirty);
};
/**
* vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
* @flags: Arg from the device_feature op
* @argsz: Arg from the device_feature op
* @supported_ops: Combination of VFIO_DEVICE_FEATURE_GET and SET the driver
* supports
* @minsz: Minimum data size the driver accepts
*
* For use in a driver's device_feature op. Checks that the inputs to the
* VFIO_DEVICE_FEATURE ioctl are correct for the driver's feature. Returns 1 if
* the driver should execute the get or set, otherwise the relevant
* value should be returned.
*/
static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops,
size_t minsz)
{
if ((flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)) &
~supported_ops)
return -EINVAL;
if (flags & VFIO_DEVICE_FEATURE_PROBE)
return 0;
/* Without PROBE one of GET or SET must be requested */
if (!(flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)))
return -EINVAL;
if (argsz < minsz)
return -EINVAL;
return 1;
}
struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
const struct vfio_device_ops *ops);
#define vfio_alloc_device(dev_struct, member, dev, ops) \
container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
struct dev_struct, member)), \
dev, ops), \
struct dev_struct, member)
static inline void vfio_put_device(struct vfio_device *device)
{
put_device(&device->device);
}
int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
struct vfio_device *
vfio_find_device_in_devset(struct vfio_device_set *dev_set,
struct device *dev);
int vfio_mig_get_next_state(struct vfio_device *device,
enum vfio_device_mig_state cur_fsm,
enum vfio_device_mig_state new_fsm,
enum vfio_device_mig_state *next_fsm);
/*
* External user API
*/
struct iommu_group *vfio_file_iommu_group(struct file *file);
bool vfio_file_is_group(struct file *file);
bool vfio_file_is_valid(struct file *file);
bool vfio_file_enforced_coherent(struct file *file);
void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
#define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long))
int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
int npage, int prot, struct page **pages);
void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage);
int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova,
void *data, size_t len, bool write);
/*
* Sub-module helpers
*/
struct vfio_info_cap {
struct vfio_info_cap_header *buf;
size_t size;
};
struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
size_t size, u16 id,
u16 version);
void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
int vfio_info_add_capability(struct vfio_info_cap *caps,
struct vfio_info_cap_header *cap, size_t size);
int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
int num_irqs, int max_irq_type,
size_t *data_size);
/*
* IRQfd - generic
*/
struct virqfd {
void *opaque;
struct eventfd_ctx *eventfd;
int (*handler)(void *, void *);
void (*thread)(void *, void *);
void *data;
struct work_struct inject;
wait_queue_entry_t wait;
poll_table pt;
struct work_struct shutdown;
struct virqfd **pvirqfd;
};
int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *),
void (*thread)(void *, void *), void *data,
struct virqfd **pvirqfd, int fd);
void vfio_virqfd_disable(struct virqfd **pvirqfd);
#endif /* VFIO_H */