diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 88dc66ee1c46..27539770a613 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -89,6 +89,90 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(published); +static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); + size_t len = vma->vm_end - vma->vm_start; + struct pci_p2pdma *p2pdma; + struct percpu_ref *ref; + unsigned long vaddr; + void *kaddr; + int ret; + + /* prevent private mappings from being established */ + if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { + pci_info_ratelimited(pdev, + "%s: fail, attempted private mapping\n", + current->comm); + return -EINVAL; + } + + if (vma->vm_pgoff) { + pci_info_ratelimited(pdev, + "%s: fail, attempted mapping with non-zero offset\n", + current->comm); + return -EINVAL; + } + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (!p2pdma) { + ret = -ENODEV; + goto out; + } + + kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref); + if (!kaddr) { + ret = -ENOMEM; + goto out; + } + + /* + * vm_insert_page() can sleep, so a reference is taken to mapping + * such that rcu_read_unlock() can be done before inserting the + * pages + */ + if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { + ret = -ENODEV; + goto out_free_mem; + } + rcu_read_unlock(); + + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr)); + if (ret) { + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); + return ret; + } + percpu_ref_get(ref); + put_page(virt_to_page(kaddr)); + kaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + + percpu_ref_put(ref); + + return 0; +out_free_mem: + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); +out: + rcu_read_unlock(); + return ret; +} + +static struct bin_attribute p2pmem_alloc_attr = { + .attr = { .name = "allocate", .mode = 0660 }, + .mmap = p2pmem_alloc_mmap, + /* + * Some places where we want to call mmap (ie. python) will check + * that the file size is greater than the mmap size before allowing + * the mmap to continue. To work around this, just set the size + * to be very large. + */ + .size = SZ_1T, +}; + static struct attribute *p2pmem_attrs[] = { &dev_attr_size.attr, &dev_attr_available.attr, @@ -96,11 +180,32 @@ static struct attribute *p2pmem_attrs[] = { NULL, }; +static struct bin_attribute *p2pmem_bin_attrs[] = { + &p2pmem_alloc_attr, + NULL, +}; + static const struct attribute_group p2pmem_group = { .attrs = p2pmem_attrs, + .bin_attrs = p2pmem_bin_attrs, .name = "p2pmem", }; +static void p2pdma_page_free(struct page *page) +{ + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap); + struct percpu_ref *ref; + + gen_pool_free_owner(pgmap->provider->p2pdma->pool, + (uintptr_t)page_to_virt(page), PAGE_SIZE, + (void **)&ref); + percpu_ref_put(ref); +} + +static const struct dev_pagemap_ops p2pdma_pgmap_ops = { + .page_free = p2pdma_page_free, +}; + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; @@ -152,6 +257,19 @@ out: return error; } +static void pci_p2pdma_unmap_mappings(void *data) +{ + struct pci_dev *pdev = data; + + /* + * Removing the alloc attribute from sysfs will call + * unmap_mapping_range() on the inode, teardown any existing userspace + * mappings and prevent new ones from being created. + */ + sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr, + p2pmem_group.name); +} + /** * pci_p2pdma_add_resource - add memory for use as p2p memory * @pdev: the device to add the memory to @@ -198,6 +316,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pgmap->range.end = pgmap->range.start + size - 1; pgmap->nr_range = 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; + pgmap->ops = &p2pdma_pgmap_ops; p2p_pgmap->provider = pdev; p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - @@ -209,6 +328,11 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } + error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, + pdev); + if (error) + goto pages_free; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset,