2019-06-04 08:11:33 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-07-31 14:16:22 +00:00
|
|
|
/*
|
|
|
|
* VFIO API definition
|
|
|
|
*
|
|
|
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
|
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
|
*/
|
|
|
|
#ifndef VFIO_H
|
|
|
|
#define VFIO_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <linux/iommu.h>
|
|
|
|
#include <linux/mm.h>
|
2015-03-16 20:08:54 +00:00
|
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <linux/poll.h>
|
2023-07-18 13:55:43 +00:00
|
|
|
#include <linux/cdev.h>
|
2012-10-13 09:46:48 +00:00
|
|
|
#include <uapi/linux/vfio.h>
|
2022-09-08 18:34:43 +00:00
|
|
|
#include <linux/iova_bitmap.h>
|
2012-07-31 14:16:22 +00:00
|
|
|
|
2022-05-04 19:14:44 +00:00
|
|
|
struct kvm;
|
2022-11-29 20:31:51 +00:00
|
|
|
struct iommufd_ctx;
|
|
|
|
struct iommufd_device;
|
2022-11-29 20:31:52 +00:00
|
|
|
struct iommufd_access;
|
2022-05-04 19:14:44 +00:00
|
|
|
|
2021-08-06 01:19:00 +00:00
|
|
|
/*
|
|
|
|
* VFIO devices can be placed in a set, this allows all devices to share this
|
|
|
|
* structure and the VFIO core will provide a lock that is held around
|
|
|
|
* open_device()/close_device() for all devices in the set.
|
|
|
|
*/
|
|
|
|
struct vfio_device_set {
|
|
|
|
void *set_id;
|
|
|
|
struct mutex lock;
|
|
|
|
struct list_head device_list;
|
|
|
|
unsigned int device_count;
|
|
|
|
};
|
|
|
|
|
2021-03-30 15:53:05 +00:00
|
|
|
struct vfio_device {
|
|
|
|
struct device *dev;
|
|
|
|
const struct vfio_device_ops *ops;
|
2022-06-28 15:59:10 +00:00
|
|
|
/*
|
2022-09-08 18:34:43 +00:00
|
|
|
* mig_ops/log_ops is a static property of the vfio_device which must
|
|
|
|
* be set prior to registering the vfio_device.
|
2022-06-28 15:59:10 +00:00
|
|
|
*/
|
|
|
|
const struct vfio_migration_ops *mig_ops;
|
2022-09-08 18:34:43 +00:00
|
|
|
const struct vfio_log_ops *log_ops;
|
2023-07-18 13:55:50 +00:00
|
|
|
#if IS_ENABLED(CONFIG_VFIO_GROUP)
|
2021-03-30 15:53:05 +00:00
|
|
|
struct vfio_group *group;
|
2023-07-18 13:55:50 +00:00
|
|
|
struct list_head group_next;
|
|
|
|
struct list_head iommu_entry;
|
|
|
|
#endif
|
2021-08-06 01:19:00 +00:00
|
|
|
struct vfio_device_set *dev_set;
|
|
|
|
struct list_head dev_set_list;
|
2022-02-24 14:20:19 +00:00
|
|
|
unsigned int migration_flags;
|
2022-05-19 18:33:11 +00:00
|
|
|
struct kvm *kvm;
|
2021-03-30 15:53:05 +00:00
|
|
|
|
|
|
|
/* Members below here are private, not for driver use */
|
2022-09-21 10:44:01 +00:00
|
|
|
unsigned int index;
|
|
|
|
struct device device; /* device.kref covers object life circle */
|
2023-07-18 13:55:43 +00:00
|
|
|
#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
|
|
|
|
struct cdev cdev;
|
|
|
|
#endif
|
2022-09-21 10:43:47 +00:00
|
|
|
refcount_t refcount; /* user count on registered device*/
|
2021-08-06 01:19:00 +00:00
|
|
|
unsigned int open_count;
|
2021-03-30 15:53:05 +00:00
|
|
|
struct completion comp;
|
2022-11-29 20:31:52 +00:00
|
|
|
struct iommufd_access *iommufd_access;
|
2023-02-03 21:50:26 +00:00
|
|
|
void (*put_kvm)(struct kvm *kvm);
|
2022-11-29 20:31:51 +00:00
|
|
|
#if IS_ENABLED(CONFIG_IOMMUFD)
|
|
|
|
struct iommufd_device *iommufd_device;
|
2023-07-18 13:55:47 +00:00
|
|
|
u8 iommufd_attached:1;
|
2022-11-29 20:31:51 +00:00
|
|
|
#endif
|
2023-07-18 13:55:47 +00:00
|
|
|
u8 cdev_opened:1;
|
2021-03-30 15:53:05 +00:00
|
|
|
};
|
|
|
|
|
2012-07-31 14:16:22 +00:00
|
|
|
/**
|
|
|
|
* struct vfio_device_ops - VFIO bus driver device callbacks
|
|
|
|
*
|
2023-03-29 12:06:03 +00:00
|
|
|
* @name: Name of the device driver.
|
2022-09-21 10:43:47 +00:00
|
|
|
* @init: initialize private fields in device structure
|
|
|
|
* @release: Reclaim private fields in device structure
|
2023-02-09 08:12:09 +00:00
|
|
|
* @bind_iommufd: Called when binding the device to an iommufd
|
|
|
|
* @unbind_iommufd: Opposite of bind_iommufd
|
|
|
|
* @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the
|
2023-07-18 13:55:38 +00:00
|
|
|
* bound iommufd. Undo in unbind_iommufd if @detach_ioas is not
|
|
|
|
* called.
|
|
|
|
* @detach_ioas: Opposite of attach_ioas
|
2021-08-06 01:19:00 +00:00
|
|
|
* @open_device: Called when the first file descriptor is opened for this device
|
|
|
|
* @close_device: Opposite of open_device
|
2012-07-31 14:16:22 +00:00
|
|
|
* @read: Perform read(2) on device file descriptor
|
|
|
|
* @write: Perform write(2) on device file descriptor
|
|
|
|
* @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
|
|
|
|
* operations documented below
|
|
|
|
* @mmap: Perform mmap(2) on a region of the device file descriptor
|
2015-02-06 22:05:07 +00:00
|
|
|
* @request: Request for the bus driver to release the device
|
2020-03-24 15:28:25 +00:00
|
|
|
* @match: Optional device name match callback (return: 0 for no-match, >0 for
|
|
|
|
* match, -errno for abort (ex. match with insufficient or incorrect
|
|
|
|
* additional args)
|
2022-07-20 00:02:48 +00:00
|
|
|
* @dma_unmap: Called when userspace unmaps IOVA from the container
|
|
|
|
* this device is attached to.
|
2022-02-24 14:20:17 +00:00
|
|
|
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
|
2012-07-31 14:16:22 +00:00
|
|
|
*/
|
|
|
|
struct vfio_device_ops {
|
|
|
|
char *name;
|
2022-09-21 10:43:47 +00:00
|
|
|
int (*init)(struct vfio_device *vdev);
|
|
|
|
void (*release)(struct vfio_device *vdev);
|
2022-11-29 20:31:51 +00:00
|
|
|
int (*bind_iommufd)(struct vfio_device *vdev,
|
|
|
|
struct iommufd_ctx *ictx, u32 *out_device_id);
|
|
|
|
void (*unbind_iommufd)(struct vfio_device *vdev);
|
|
|
|
int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
|
2023-07-18 13:55:38 +00:00
|
|
|
void (*detach_ioas)(struct vfio_device *vdev);
|
2021-08-06 01:19:00 +00:00
|
|
|
int (*open_device)(struct vfio_device *vdev);
|
|
|
|
void (*close_device)(struct vfio_device *vdev);
|
2021-03-30 15:53:08 +00:00
|
|
|
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
|
2012-07-31 14:16:22 +00:00
|
|
|
size_t count, loff_t *ppos);
|
2021-03-30 15:53:08 +00:00
|
|
|
ssize_t (*write)(struct vfio_device *vdev, const char __user *buf,
|
2012-07-31 14:16:22 +00:00
|
|
|
size_t count, loff_t *size);
|
2021-03-30 15:53:08 +00:00
|
|
|
long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
|
2012-07-31 14:16:22 +00:00
|
|
|
unsigned long arg);
|
2021-03-30 15:53:08 +00:00
|
|
|
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
|
|
|
|
void (*request)(struct vfio_device *vdev, unsigned int count);
|
|
|
|
int (*match)(struct vfio_device *vdev, char *buf);
|
2022-07-20 00:02:48 +00:00
|
|
|
void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
|
2022-02-24 14:20:17 +00:00
|
|
|
int (*device_feature)(struct vfio_device *device, u32 flags,
|
|
|
|
void __user *arg, size_t argsz);
|
2022-06-28 15:59:10 +00:00
|
|
|
};
|
|
|
|
|
2022-11-29 20:31:51 +00:00
|
|
|
#if IS_ENABLED(CONFIG_IOMMUFD)
|
2023-07-18 10:55:40 +00:00
|
|
|
struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev);
|
|
|
|
int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx);
|
2022-11-29 20:31:51 +00:00
|
|
|
int vfio_iommufd_physical_bind(struct vfio_device *vdev,
|
|
|
|
struct iommufd_ctx *ictx, u32 *out_device_id);
|
|
|
|
void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
|
|
|
|
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
|
2023-07-18 13:55:38 +00:00
|
|
|
void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev);
|
2022-11-29 20:31:52 +00:00
|
|
|
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
|
|
|
|
struct iommufd_ctx *ictx, u32 *out_device_id);
|
|
|
|
void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
|
|
|
|
int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
|
2023-07-18 13:55:40 +00:00
|
|
|
void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev);
|
2022-11-29 20:31:51 +00:00
|
|
|
#else
|
2023-07-18 10:55:40 +00:00
|
|
|
static inline struct iommufd_ctx *
|
|
|
|
vfio_iommufd_device_ictx(struct vfio_device *vdev)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx)
|
|
|
|
{
|
|
|
|
return VFIO_PCI_DEVID_NOT_OWNED;
|
|
|
|
}
|
|
|
|
|
2022-11-29 20:31:51 +00:00
|
|
|
#define vfio_iommufd_physical_bind \
|
|
|
|
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
|
|
|
|
u32 *out_device_id)) NULL)
|
|
|
|
#define vfio_iommufd_physical_unbind \
|
|
|
|
((void (*)(struct vfio_device *vdev)) NULL)
|
|
|
|
#define vfio_iommufd_physical_attach_ioas \
|
|
|
|
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
|
2023-07-18 13:55:38 +00:00
|
|
|
#define vfio_iommufd_physical_detach_ioas \
|
|
|
|
((void (*)(struct vfio_device *vdev)) NULL)
|
2022-11-29 20:31:52 +00:00
|
|
|
#define vfio_iommufd_emulated_bind \
|
|
|
|
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
|
|
|
|
u32 *out_device_id)) NULL)
|
|
|
|
#define vfio_iommufd_emulated_unbind \
|
|
|
|
((void (*)(struct vfio_device *vdev)) NULL)
|
|
|
|
#define vfio_iommufd_emulated_attach_ioas \
|
|
|
|
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
|
2023-07-18 13:55:40 +00:00
|
|
|
#define vfio_iommufd_emulated_detach_ioas \
|
|
|
|
((void (*)(struct vfio_device *vdev)) NULL)
|
2022-11-29 20:31:51 +00:00
|
|
|
#endif
|
|
|
|
|
2023-07-18 10:55:38 +00:00
|
|
|
static inline bool vfio_device_cdev_opened(struct vfio_device *device)
|
|
|
|
{
|
2023-07-18 13:55:47 +00:00
|
|
|
return device->cdev_opened;
|
2023-07-18 10:55:38 +00:00
|
|
|
}
|
|
|
|
|
2022-06-28 15:59:10 +00:00
|
|
|
/**
|
2023-03-29 12:06:03 +00:00
|
|
|
* struct vfio_migration_ops - VFIO bus device driver migration callbacks
|
|
|
|
*
|
2022-06-28 15:59:10 +00:00
|
|
|
* @migration_set_state: Optional callback to change the migration state for
|
|
|
|
* devices that support migration. It's mandatory for
|
|
|
|
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
|
|
|
|
* The returned FD is used for data transfer according to the FSM
|
|
|
|
* definition. The driver is responsible to ensure that FD reaches end
|
|
|
|
* of stream or error whenever the migration FSM leaves a data transfer
|
|
|
|
* state or before close_device() returns.
|
|
|
|
* @migration_get_state: Optional callback to get the migration state for
|
|
|
|
* devices that support migration. It's mandatory for
|
|
|
|
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
|
2022-11-06 17:46:18 +00:00
|
|
|
* @migration_get_data_size: Optional callback to get the estimated data
|
|
|
|
* length that will be required to complete stop copy. It's mandatory for
|
|
|
|
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
|
2022-06-28 15:59:10 +00:00
|
|
|
*/
|
|
|
|
struct vfio_migration_ops {
|
vfio: Define device migration protocol v2
Replace the existing region based migration protocol with an ioctl based
protocol. The two protocols have the same general semantic behaviors, but
the way the data is transported is changed.
This is the STOP_COPY portion of the new protocol, it defines the 5 states
for basic stop and copy migration and the protocol to move the migration
data in/out of the kernel.
Compared to the clarification of the v1 protocol Alex proposed:
https://lore.kernel.org/r/163909282574.728533.7460416142511440919.stgit@omen
This has a few deliberate functional differences:
- ERROR arcs allow the device function to remain unchanged.
- The protocol is not required to return to the original state on
transition failure. Instead userspace can execute an unwind back to
the original state, reset, or do something else without needing kernel
support. This simplifies the kernel design and should userspace choose
a policy like always reset, avoids doing useless work in the kernel
on error handling paths.
- PRE_COPY is made optional, userspace must discover it before using it.
This reflects the fact that the majority of drivers we are aware of
right now will not implement PRE_COPY.
- segmentation is not part of the data stream protocol, the receiver
does not have to reproduce the framing boundaries.
The hybrid FSM for the device_state is described as a Mealy machine by
documenting each of the arcs the driver is required to implement. Defining
the remaining set of old/new device_state transitions as 'combination
transitions' which are naturally defined as taking multiple FSM arcs along
the shortest path within the FSM's digraph allows a complete matrix of
transitions.
A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE is
defined to replace writing to the device_state field in the region. This
allows returning a brand new FD whenever the requested transition opens
a data transfer session.
The VFIO core code implements the new feature and provides a helper
function to the driver. Using the helper the driver only has to
implement 6 of the FSM arcs and the other combination transitions are
elaborated consistently from those arcs.
A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIGRATION is defined to
report the capability for migration and indicate which set of states and
arcs are supported by the device. The FSM provides a lot of flexibility to
make backwards compatible extensions but the VFIO_DEVICE_FEATURE also
allows for future breaking extensions for scenarios that cannot support
even the basic STOP_COPY requirements.
The VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE with the GET option (i.e.
VFIO_DEVICE_FEATURE_GET) can be used to read the current migration state
of the VFIO device.
Data transfer sessions are now carried over a file descriptor, instead of
the region. The FD functions for the lifetime of the data transfer
session. read() and write() transfer the data with normal Linux stream FD
semantics. This design allows future expansion to support poll(),
io_uring, and other performance optimizations.
The complicated mmap mode for data transfer is discarded as current qemu
doesn't take meaningful advantage of it, and the new qemu implementation
avoids substantially all the performance penalty of using a read() on the
region.
Link: https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
2022-02-24 14:20:18 +00:00
|
|
|
struct file *(*migration_set_state)(
|
|
|
|
struct vfio_device *device,
|
|
|
|
enum vfio_device_mig_state new_state);
|
|
|
|
int (*migration_get_state)(struct vfio_device *device,
|
|
|
|
enum vfio_device_mig_state *curr_state);
|
2022-11-06 17:46:18 +00:00
|
|
|
int (*migration_get_data_size)(struct vfio_device *device,
|
|
|
|
unsigned long *stop_copy_length);
|
2012-07-31 14:16:22 +00:00
|
|
|
};
|
|
|
|
|
2022-09-08 18:34:43 +00:00
|
|
|
/**
|
2023-03-29 12:06:03 +00:00
|
|
|
* struct vfio_log_ops - VFIO bus device driver logging callbacks
|
|
|
|
*
|
2022-09-08 18:34:43 +00:00
|
|
|
* @log_start: Optional callback to ask the device start DMA logging.
|
|
|
|
* @log_stop: Optional callback to ask the device stop DMA logging.
|
|
|
|
* @log_read_and_clear: Optional callback to ask the device read
|
|
|
|
* and clear the dirty DMAs in some given range.
|
|
|
|
*
|
|
|
|
* The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
|
|
|
|
* of features does not track logging state relative to the device,
|
|
|
|
* therefore the device implementation of vfio_log_ops must handle
|
|
|
|
* arbitrary user requests. This includes rejecting subsequent calls
|
|
|
|
* to log_start without an intervening log_stop, as well as graceful
|
|
|
|
* handling of log_stop and log_read_and_clear from invalid states.
|
|
|
|
*/
|
|
|
|
struct vfio_log_ops {
|
|
|
|
int (*log_start)(struct vfio_device *device,
|
|
|
|
struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
|
|
|
|
int (*log_stop)(struct vfio_device *device);
|
|
|
|
int (*log_read_and_clear)(struct vfio_device *device,
|
|
|
|
unsigned long iova, unsigned long length,
|
|
|
|
struct iova_bitmap *dirty);
|
|
|
|
};
|
|
|
|
|
2022-02-24 14:20:17 +00:00
|
|
|
/**
|
|
|
|
* vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
|
|
|
|
* @flags: Arg from the device_feature op
|
|
|
|
* @argsz: Arg from the device_feature op
|
|
|
|
* @supported_ops: Combination of VFIO_DEVICE_FEATURE_GET and SET the driver
|
|
|
|
* supports
|
|
|
|
* @minsz: Minimum data size the driver accepts
|
|
|
|
*
|
|
|
|
* For use in a driver's device_feature op. Checks that the inputs to the
|
|
|
|
* VFIO_DEVICE_FEATURE ioctl are correct for the driver's feature. Returns 1 if
|
|
|
|
* the driver should execute the get or set, otherwise the relevant
|
|
|
|
* value should be returned.
|
|
|
|
*/
|
|
|
|
static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops,
|
|
|
|
size_t minsz)
|
|
|
|
{
|
|
|
|
if ((flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)) &
|
|
|
|
~supported_ops)
|
|
|
|
return -EINVAL;
|
|
|
|
if (flags & VFIO_DEVICE_FEATURE_PROBE)
|
|
|
|
return 0;
|
|
|
|
/* Without PROBE one of GET or SET must be requested */
|
|
|
|
if (!(flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)))
|
|
|
|
return -EINVAL;
|
|
|
|
if (argsz < minsz)
|
|
|
|
return -EINVAL;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2022-09-21 10:43:47 +00:00
|
|
|
struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
|
|
|
|
const struct vfio_device_ops *ops);
|
|
|
|
#define vfio_alloc_device(dev_struct, member, dev, ops) \
|
|
|
|
container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \
|
|
|
|
BUILD_BUG_ON_ZERO(offsetof( \
|
|
|
|
struct dev_struct, member)), \
|
|
|
|
dev, ops), \
|
|
|
|
struct dev_struct, member)
|
|
|
|
|
|
|
|
static inline void vfio_put_device(struct vfio_device *device)
|
|
|
|
{
|
2022-09-21 10:44:01 +00:00
|
|
|
put_device(&device->device);
|
2022-09-21 10:43:47 +00:00
|
|
|
}
|
|
|
|
|
2021-03-30 15:53:05 +00:00
|
|
|
int vfio_register_group_dev(struct vfio_device *device);
|
2021-09-24 15:56:57 +00:00
|
|
|
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
|
2021-03-30 15:53:05 +00:00
|
|
|
void vfio_unregister_group_dev(struct vfio_device *device);
|
2012-07-31 14:16:22 +00:00
|
|
|
|
2021-08-06 01:19:00 +00:00
|
|
|
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
|
2022-11-10 01:40:26 +00:00
|
|
|
unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
|
2023-07-18 10:55:39 +00:00
|
|
|
struct vfio_device *
|
|
|
|
vfio_find_device_in_devset(struct vfio_device_set *dev_set,
|
|
|
|
struct device *dev);
|
2021-08-06 01:19:00 +00:00
|
|
|
|
vfio: Define device migration protocol v2
Replace the existing region based migration protocol with an ioctl based
protocol. The two protocols have the same general semantic behaviors, but
the way the data is transported is changed.
This is the STOP_COPY portion of the new protocol, it defines the 5 states
for basic stop and copy migration and the protocol to move the migration
data in/out of the kernel.
Compared to the clarification of the v1 protocol Alex proposed:
https://lore.kernel.org/r/163909282574.728533.7460416142511440919.stgit@omen
This has a few deliberate functional differences:
- ERROR arcs allow the device function to remain unchanged.
- The protocol is not required to return to the original state on
transition failure. Instead userspace can execute an unwind back to
the original state, reset, or do something else without needing kernel
support. This simplifies the kernel design and should userspace choose
a policy like always reset, avoids doing useless work in the kernel
on error handling paths.
- PRE_COPY is made optional, userspace must discover it before using it.
This reflects the fact that the majority of drivers we are aware of
right now will not implement PRE_COPY.
- segmentation is not part of the data stream protocol, the receiver
does not have to reproduce the framing boundaries.
The hybrid FSM for the device_state is described as a Mealy machine by
documenting each of the arcs the driver is required to implement. Defining
the remaining set of old/new device_state transitions as 'combination
transitions' which are naturally defined as taking multiple FSM arcs along
the shortest path within the FSM's digraph allows a complete matrix of
transitions.
A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE is
defined to replace writing to the device_state field in the region. This
allows returning a brand new FD whenever the requested transition opens
a data transfer session.
The VFIO core code implements the new feature and provides a helper
function to the driver. Using the helper the driver only has to
implement 6 of the FSM arcs and the other combination transitions are
elaborated consistently from those arcs.
A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIGRATION is defined to
report the capability for migration and indicate which set of states and
arcs are supported by the device. The FSM provides a lot of flexibility to
make backwards compatible extensions but the VFIO_DEVICE_FEATURE also
allows for future breaking extensions for scenarios that cannot support
even the basic STOP_COPY requirements.
The VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE with the GET option (i.e.
VFIO_DEVICE_FEATURE_GET) can be used to read the current migration state
of the VFIO device.
Data transfer sessions are now carried over a file descriptor, instead of
the region. The FD functions for the lifetime of the data transfer
session. read() and write() transfer the data with normal Linux stream FD
semantics. This design allows future expansion to support poll(),
io_uring, and other performance optimizations.
The complicated mmap mode for data transfer is discarded as current qemu
doesn't take meaningful advantage of it, and the new qemu implementation
avoids substantially all the performance penalty of using a read() on the
region.
Link: https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
2022-02-24 14:20:18 +00:00
|
|
|
int vfio_mig_get_next_state(struct vfio_device *device,
|
|
|
|
enum vfio_device_mig_state cur_fsm,
|
|
|
|
enum vfio_device_mig_state new_fsm,
|
|
|
|
enum vfio_device_mig_state *next_fsm);
|
|
|
|
|
2023-08-07 20:57:48 +00:00
|
|
|
void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
|
|
|
|
u32 req_nodes);
|
|
|
|
|
2013-08-05 16:52:36 +00:00
|
|
|
/*
|
|
|
|
* External user API
|
|
|
|
*/
|
2023-07-18 13:55:50 +00:00
|
|
|
#if IS_ENABLED(CONFIG_VFIO_GROUP)
|
2022-06-08 18:55:13 +00:00
|
|
|
struct iommu_group *vfio_file_iommu_group(struct file *file);
|
2022-10-07 14:04:39 +00:00
|
|
|
bool vfio_file_is_group(struct file *file);
|
2023-07-18 13:55:50 +00:00
|
|
|
bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
|
|
|
|
#else
|
|
|
|
static inline struct iommu_group *vfio_file_iommu_group(struct file *file)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool vfio_file_is_group(struct file *file)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#endif
|
2023-07-18 13:55:27 +00:00
|
|
|
bool vfio_file_is_valid(struct file *file);
|
2022-06-08 18:55:13 +00:00
|
|
|
bool vfio_file_enforced_coherent(struct file *file);
|
|
|
|
void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
|
2013-08-05 16:52:36 +00:00
|
|
|
|
2016-11-16 20:46:17 +00:00
|
|
|
#define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long))
|
|
|
|
|
2022-07-23 02:02:51 +00:00
|
|
|
int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
|
2022-07-23 02:02:56 +00:00
|
|
|
int npage, int prot, struct page **pages);
|
2022-07-23 02:02:51 +00:00
|
|
|
void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage);
|
2022-07-23 02:02:54 +00:00
|
|
|
int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova,
|
2022-06-08 18:55:13 +00:00
|
|
|
void *data, size_t len, bool write);
|
2020-03-24 15:27:57 +00:00
|
|
|
|
2016-02-22 23:02:33 +00:00
|
|
|
/*
|
|
|
|
* Sub-module helpers
|
|
|
|
*/
|
|
|
|
struct vfio_info_cap {
|
|
|
|
struct vfio_info_cap_header *buf;
|
|
|
|
size_t size;
|
|
|
|
};
|
2022-06-08 18:55:13 +00:00
|
|
|
struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
|
|
|
|
size_t size, u16 id,
|
|
|
|
u16 version);
|
|
|
|
void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
|
2016-02-22 23:02:33 +00:00
|
|
|
|
2022-06-08 18:55:13 +00:00
|
|
|
int vfio_info_add_capability(struct vfio_info_cap *caps,
|
|
|
|
struct vfio_info_cap_header *cap, size_t size);
|
2016-11-16 20:46:25 +00:00
|
|
|
|
2022-06-08 18:55:13 +00:00
|
|
|
int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
|
|
|
|
int num_irqs, int max_irq_type,
|
|
|
|
size_t *data_size);
|
2016-11-16 20:46:27 +00:00
|
|
|
|
2015-03-16 20:08:54 +00:00
|
|
|
/*
|
|
|
|
* IRQfd - generic
|
|
|
|
*/
|
|
|
|
struct virqfd {
|
|
|
|
void *opaque;
|
|
|
|
struct eventfd_ctx *eventfd;
|
|
|
|
int (*handler)(void *, void *);
|
|
|
|
void (*thread)(void *, void *);
|
|
|
|
void *data;
|
|
|
|
struct work_struct inject;
|
2017-06-20 10:06:13 +00:00
|
|
|
wait_queue_entry_t wait;
|
2015-03-16 20:08:54 +00:00
|
|
|
poll_table pt;
|
|
|
|
struct work_struct shutdown;
|
|
|
|
struct virqfd **pvirqfd;
|
|
|
|
};
|
|
|
|
|
2022-06-08 18:55:13 +00:00
|
|
|
int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *),
|
|
|
|
void (*thread)(void *, void *), void *data,
|
|
|
|
struct virqfd **pvirqfd, int fd);
|
|
|
|
void vfio_virqfd_disable(struct virqfd **pvirqfd);
|
2015-03-16 20:08:54 +00:00
|
|
|
|
2012-07-31 14:16:22 +00:00
|
|
|
#endif /* VFIO_H */
|