linux-stable/drivers/iommu/iommufd/iommufd_private.h

134 lines
4.3 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
*/
#ifndef __IOMMUFD_PRIVATE_H
#define __IOMMUFD_PRIVATE_H
#include <linux/rwsem.h>
#include <linux/xarray.h>
#include <linux/refcount.h>
#include <linux/uaccess.h>
struct iommufd_ctx {
struct file *file;
struct xarray objects;
};
iommufd: PFN handling for iopt_pages The top of the data structure provides an IO Address Space (IOAS) that is similar to a VFIO container. The IOAS allows map/unmap of memory into ranges of IOVA called iopt_areas. Multiple IOMMU domains (IO page tables) and in-kernel accesses (like VFIO mdevs) can be attached to the IOAS to access the PFNs that those IOVA areas cover. The IO Address Space (IOAS) datastructure is composed of: - struct io_pagetable holding the IOVA map - struct iopt_areas representing populated portions of IOVA - struct iopt_pages representing the storage of PFNs - struct iommu_domain representing each IO page table in the system IOMMU - struct iopt_pages_access representing in-kernel accesses of PFNs (ie VFIO mdevs) - struct xarray pinned_pfns holding a list of pages pinned by in-kernel accesses This patch introduces the lowest part of the datastructure - the movement of PFNs in a tiered storage scheme: 1) iopt_pages::pinned_pfns xarray 2) Multiple iommu_domains 3) The origin of the PFNs, i.e. the userspace pointer PFN have to be copied between all combinations of tiers, depending on the configuration. The interface is an iterator called a 'pfn_reader' which determines which tier each PFN is stored and loads it into a list of PFNs held in a struct pfn_batch. Each step of the iterator will fill up the pfn_batch, then the caller can use the pfn_batch to send the PFNs to the required destination. Repeating this loop will read all the PFNs in an IOVA range. The pfn_reader and pfn_batch also keep track of the pinned page accounting. While PFNs are always stored and accessed as full PAGE_SIZE units the iommu_domain tier can store with a sub-page offset/length to support IOMMUs with a smaller IOPTE size than PAGE_SIZE. Link: https://lore.kernel.org/r/8-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian <kevin.tian@intel.com> Tested-by: Nicolin Chen <nicolinc@nvidia.com> Tested-by: Yi Liu <yi.l.liu@intel.com> Tested-by: Lixiao Yang <lixiao.yang@intel.com> Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2022-11-29 20:29:31 +00:00
/*
* The IOVA to PFN map. The map automatically copies the PFNs into multiple
* domains and permits sharing of PFNs between io_pagetable instances. This
* supports both a design where IOAS's are 1:1 with a domain (eg because the
* domain is HW customized), or where the IOAS is 1:N with multiple generic
* domains. The io_pagetable holds an interval tree of iopt_areas which point
* to shared iopt_pages which hold the pfns mapped to the page table.
*
* The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
*/
struct io_pagetable {
struct rw_semaphore domains_rwsem;
struct xarray domains;
unsigned int next_domain_id;
struct rw_semaphore iova_rwsem;
struct rb_root_cached area_itree;
/* IOVA that cannot become reserved, struct iopt_allowed */
struct rb_root_cached allowed_itree;
/* IOVA that cannot be allocated, struct iopt_reserved */
struct rb_root_cached reserved_itree;
u8 disable_large_pages;
};
struct iommufd_ucmd {
struct iommufd_ctx *ictx;
void __user *ubuffer;
u32 user_size;
void *cmd;
};
/* Copy the response in ucmd->cmd back to userspace. */
static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
size_t cmd_len)
{
if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
min_t(size_t, ucmd->user_size, cmd_len)))
return -EFAULT;
return 0;
}
enum iommufd_object_type {
IOMMUFD_OBJ_NONE,
IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
};
/* Base struct for all objects with a userspace ID handle. */
struct iommufd_object {
struct rw_semaphore destroy_rwsem;
refcount_t users;
enum iommufd_object_type type;
unsigned int id;
};
static inline bool iommufd_lock_obj(struct iommufd_object *obj)
{
if (!down_read_trylock(&obj->destroy_rwsem))
return false;
if (!refcount_inc_not_zero(&obj->users)) {
up_read(&obj->destroy_rwsem);
return false;
}
return true;
}
struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
enum iommufd_object_type type);
static inline void iommufd_put_object(struct iommufd_object *obj)
{
refcount_dec(&obj->users);
up_read(&obj->destroy_rwsem);
}
/**
* iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount
* protection
* @obj - Object to release
*
* Objects have two refcount protections (destroy_rwsem and the refcount_t
* users). Holding either of these will prevent the object from being destroyed.
*
* Depending on the use case, one protection or the other is appropriate. In
* most cases references are being protected by the destroy_rwsem. This allows
* orderly destruction of the object because iommufd_object_destroy_user() will
* wait for it to become unlocked. However, as a rwsem, it cannot be held across
* a system call return. So cases that have longer term needs must switch
* to the weaker users refcount_t.
*
* With users protection iommufd_object_destroy_user() will return false,
* refusing to destroy the object, causing -EBUSY to userspace.
*/
static inline void iommufd_ref_to_users(struct iommufd_object *obj)
{
up_read(&obj->destroy_rwsem);
/* iommufd_lock_obj() obtains users as well */
}
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
void iommufd_object_finalize(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
size_t size,
enum iommufd_object_type type);
#define iommufd_object_alloc(ictx, ptr, type) \
container_of(_iommufd_object_alloc( \
ictx, \
sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \
offsetof(typeof(*(ptr)), \
obj) != 0), \
type), \
typeof(*(ptr)), obj)
#endif