From 2170dd04316e0754cbbfa4892a25aead39d225f7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 Dec 2017 12:56:54 +1100 Subject: [PATCH 1/4] vfio-pci: Mask INTx if a device is not capabable of enabling it At the moment VFIO rightfully assumes that INTx is supported if the interrupt pin is not set to zero in the device config space. However if that is not the case (the pin is not zero but pdev->irq is), vfio_intx_enable() fails. In order to prevent the userspace from trying to enable INTx when we know that it cannot work, let's mask the PCI_INTERRUPT_PIN register. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f041b1a6cf66..a98681dca1d3 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,9 @@ static bool vfio_pci_nointx(struct pci_dev *pdev) } } + if (!pdev->irq) + return true; + return false; } From dda01f787df9f9e46f1c0bf8aa11f246e300750d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 12 Dec 2017 12:59:39 -0700 Subject: [PATCH 2/4] vfio: Simplify capability helper The vfio_info_add_capability() helper requires the caller to pass a capability ID, which it then uses to fill in header fields, assuming hard coded versions. This makes for an awkward and rigid interface. The only thing we want this helper to do is allocate sufficient space in the caps buffer and chain this capability into the list. Reduce it to that simple task. Reviewed-by: Alexey Kardashevskiy Acked-by: Zhenyu Wang Reviewed-by: Kirti Wankhede Reviewed-by: Peter Xu Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/gpu/drm/i915/gvt/kvmgt.c | 15 ++++++--- drivers/vfio/pci/vfio_pci.c | 14 +++++---- drivers/vfio/vfio.c | 54 +++----------------------------- include/linux/vfio.h | 3 +- 4 files changed, 25 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 96060920a6fe..0a7d084da1a2 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1012,6 +1012,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (!sparse) return -ENOMEM; + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->header.version = 1; sparse->nr_areas = nr_areas; cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; sparse->areas[0].offset = @@ -1033,7 +1035,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, break; default: { - struct vfio_region_info_cap_type cap_type; + struct vfio_region_info_cap_type cap_type = { + .header.id = VFIO_REGION_INFO_CAP_TYPE, + .header.version = 1 }; if (info.index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) @@ -1050,8 +1054,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, cap_type.subtype = vgpu->vdev.region[i].subtype; ret = vfio_info_add_capability(&caps, - VFIO_REGION_INFO_CAP_TYPE, - &cap_type); + &cap_type.header, + sizeof(cap_type)); if (ret) return ret; } @@ -1061,8 +1065,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, switch (cap_type_id) { case VFIO_REGION_INFO_CAP_SPARSE_MMAP: ret = vfio_info_add_capability(&caps, - VFIO_REGION_INFO_CAP_SPARSE_MMAP, - sparse); + &sparse->header, sizeof(*sparse) + + (sparse->nr_areas * + sizeof(*sparse->areas))); kfree(sparse); if (ret) return ret; diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index a98681dca1d3..de48acd29a84 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -585,6 +585,8 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, if (!sparse) return -ENOMEM; + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->header.version = 1; sparse->nr_areas = nr_areas; if (vdev->msix_offset & PAGE_MASK) { @@ -600,8 +602,7 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, i++; } - ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP, - sparse); + ret = vfio_info_add_capability(caps, &sparse->header, size); kfree(sparse); return ret; @@ -744,7 +745,9 @@ static long vfio_pci_ioctl(void *device_data, break; default: { - struct vfio_region_info_cap_type cap_type; + struct vfio_region_info_cap_type cap_type = { + .header.id = VFIO_REGION_INFO_CAP_TYPE, + .header.version = 1 }; if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) @@ -759,9 +762,8 @@ static long vfio_pci_ioctl(void *device_data, cap_type.type = vdev->region[i].type; cap_type.subtype = vdev->region[i].subtype; - ret = vfio_info_add_capability(&caps, - VFIO_REGION_INFO_CAP_TYPE, - &cap_type); + ret = vfio_info_add_capability(&caps, &cap_type.header, + sizeof(cap_type)); if (ret) return ret; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 2bc3705a99bd..721f97f8dac1 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1857,63 +1857,19 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) } EXPORT_SYMBOL(vfio_info_cap_shift); -static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type) +int vfio_info_add_capability(struct vfio_info_cap *caps, + struct vfio_info_cap_header *cap, size_t size) { struct vfio_info_cap_header *header; - struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type; - size_t size; - size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas); - header = vfio_info_cap_add(caps, size, - VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); + header = vfio_info_cap_add(caps, size, cap->id, cap->version); if (IS_ERR(header)) return PTR_ERR(header); - sparse_cap = container_of(header, - struct vfio_region_info_cap_sparse_mmap, header); - sparse_cap->nr_areas = sparse->nr_areas; - memcpy(sparse_cap->areas, sparse->areas, - sparse->nr_areas * sizeof(*sparse->areas)); + memcpy(header + 1, cap + 1, size - sizeof(*header)); + return 0; } - -static int region_type_cap(struct vfio_info_cap *caps, void *cap_type) -{ - struct vfio_info_cap_header *header; - struct vfio_region_info_cap_type *type_cap, *cap = cap_type; - - header = vfio_info_cap_add(caps, sizeof(*cap), - VFIO_REGION_INFO_CAP_TYPE, 1); - if (IS_ERR(header)) - return PTR_ERR(header); - - type_cap = container_of(header, struct vfio_region_info_cap_type, - header); - type_cap->type = cap->type; - type_cap->subtype = cap->subtype; - return 0; -} - -int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id, - void *cap_type) -{ - int ret = -EINVAL; - - if (!cap_type) - return 0; - - switch (cap_type_id) { - case VFIO_REGION_INFO_CAP_SPARSE_MMAP: - ret = sparse_mmap_cap(caps, cap_type); - break; - - case VFIO_REGION_INFO_CAP_TYPE: - ret = region_type_cap(caps, cap_type); - break; - } - - return ret; -} EXPORT_SYMBOL(vfio_info_add_capability); int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a47b985341d1..66741ab087c1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -145,7 +145,8 @@ extern struct vfio_info_cap_header *vfio_info_cap_add( extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); extern int vfio_info_add_capability(struct vfio_info_cap *caps, - int cap_type_id, void *cap_type); + struct vfio_info_cap_header *cap, + size_t size); extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, int max_irq_type, From a32295c612c57990d17fb0f41e7134394b2f35f6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 13 Dec 2017 13:31:31 +1100 Subject: [PATCH 3/4] vfio-pci: Allow mapping MSIX BAR By default VFIO disables mapping of MSIX BAR to the userspace as the userspace may program it in a way allowing spurious interrupts; instead the userspace uses the VFIO_DEVICE_SET_IRQS ioctl. In order to eliminate guessing from the userspace about what is mmapable, VFIO also advertises a sparse list of regions allowed to mmap. This works fine as long as the system page size equals to the MSIX alignment requirement which is 4KB. However with a bigger page size the existing code prohibits mapping non-MSIX parts of a page with MSIX structures so these parts have to be emulated via slow reads/writes on a VFIO device fd. If these emulated bits are accessed often, this has serious impact on performance. This allows mmap of the entire BAR containing MSIX vector table. This removes the sparse capability for PCI devices as it becomes useless. As the userspace needs to know for sure whether mmapping of the MSIX vector containing data can succeed, this adds a new capability - VFIO_REGION_INFO_CAP_MSIX_MAPPABLE - which explicitly tells the userspace that the entire BAR can be mmapped. This does not touch the MSIX mangling in the BAR read/write handlers as we are doing this just to enable direct access to non MSIX registers. Signed-off-by: Alexey Kardashevskiy [aw - fixup whitespace, trim function name] Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 64 +++++-------------------------------- include/uapi/linux/vfio.h | 10 ++++++ 2 files changed, 18 insertions(+), 56 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index de48acd29a84..b0f759476900 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -565,47 +565,15 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, return walk.ret; } -static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, - struct vfio_info_cap *caps) +static int msix_mmappable_cap(struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) { - struct vfio_region_info_cap_sparse_mmap *sparse; - size_t end, size; - int nr_areas = 2, i = 0, ret; + struct vfio_info_cap_header header = { + .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE, + .version = 1 + }; - end = pci_resource_len(vdev->pdev, vdev->msix_bar); - - /* If MSI-X table is aligned to the start or end, only one area */ - if (((vdev->msix_offset & PAGE_MASK) == 0) || - (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end)) - nr_areas = 1; - - size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas)); - - sparse = kzalloc(size, GFP_KERNEL); - if (!sparse) - return -ENOMEM; - - sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; - sparse->header.version = 1; - sparse->nr_areas = nr_areas; - - if (vdev->msix_offset & PAGE_MASK) { - sparse->areas[i].offset = 0; - sparse->areas[i].size = vdev->msix_offset & PAGE_MASK; - i++; - } - - if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) { - sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset + - vdev->msix_size); - sparse->areas[i].size = end - sparse->areas[i].offset; - i++; - } - - ret = vfio_info_add_capability(caps, &sparse->header, size); - kfree(sparse); - - return ret; + return vfio_info_add_capability(caps, &header, sizeof(header)); } int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, @@ -696,7 +664,7 @@ static long vfio_pci_ioctl(void *device_data, if (vdev->bar_mmap_supported[info.index]) { info.flags |= VFIO_REGION_INFO_FLAG_MMAP; if (info.index == vdev->msix_bar) { - ret = msix_sparse_mmap_cap(vdev, &caps); + ret = msix_mmappable_cap(vdev, &caps); if (ret) return ret; } @@ -1127,22 +1095,6 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) if (req_start + req_len > phys_len) return -EINVAL; - if (index == vdev->msix_bar) { - /* - * Disallow mmaps overlapping the MSI-X table; users don't - * get to touch this directly. We could find somewhere - * else to map the overlap, but page granularity is only - * a recommendation, not a requirement, so the user needs - * to know which bits are real. Requiring them to mmap - * around the table makes that clear. - */ - - /* If neither entirely above nor below, then it overlaps */ - if (!(req_start >= vdev->msix_offset + vdev->msix_size || - req_start + req_len <= vdev->msix_offset)) - return -EINVAL; - } - /* * Even though we don't make use of the barmap for the mmap, * we need to request the region and the barmap tracks that. diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e3301dbd27d4..0d914350f7bf 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -301,6 +301,16 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) +/* + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped + * which allows direct access to non-MSIX registers which happened to be within + * the same system page. + * + * Even though the userspace gets direct access to the MSIX data, the existing + * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration. + */ +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 + /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, * struct vfio_irq_info) From 46ed90f157f42d956ffed17c003f089a59b76e3e Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Fri, 22 Dec 2017 07:12:26 +0800 Subject: [PATCH 4/4] vfio: mdev: make a couple of functions and structure vfio_mdev_driver static The functions vfio_mdev_probe, vfio_mdev_remove and the structure vfio_mdev_driver are only used in this file, so make them static. Clean up sparse warnings: drivers/vfio/mdev/vfio_mdev.c:114:5: warning: no previous prototype for 'vfio_mdev_probe' [-Wmissing-prototypes] drivers/vfio/mdev/vfio_mdev.c:121:6: warning: no previous prototype for 'vfio_mdev_remove' [-Wmissing-prototypes] Signed-off-by: Xiongwei Song Reviewed-by: Quan Xu Reviewed-by: Liu, Yi L Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- drivers/vfio/mdev/vfio_mdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index fa848a701b8b..d230620fe02d 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -111,19 +111,19 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = { .mmap = vfio_mdev_mmap, }; -int vfio_mdev_probe(struct device *dev) +static int vfio_mdev_probe(struct device *dev) { struct mdev_device *mdev = to_mdev_device(dev); return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev); } -void vfio_mdev_remove(struct device *dev) +static void vfio_mdev_remove(struct device *dev) { vfio_del_group_dev(dev); } -struct mdev_driver vfio_mdev_driver = { +static struct mdev_driver vfio_mdev_driver = { .name = "vfio_mdev", .probe = vfio_mdev_probe, .remove = vfio_mdev_remove,