drm/amdgpu: Add DMA mapping of GTT BOs

Use DMABufs with dynamic attachment to DMA-map GTT BOs on other GPUs.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oak Zeng <Oak.Zeng@amd.com>
Acked-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Felix Kuehling 2021-04-11 18:52:19 -04:00 committed by Alex Deucher
parent 9e5d275319
commit 5ac3c3e45f
2 changed files with 77 additions and 1 deletions

View file

@ -47,6 +47,7 @@ struct amdgpu_device;
enum kfd_mem_attachment_type { enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
}; };
struct kfd_mem_attachment { struct kfd_mem_attachment {
@ -62,6 +63,7 @@ struct kfd_mem_attachment {
struct kgd_mem { struct kgd_mem {
struct mutex lock; struct mutex lock;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
struct dma_buf *dmabuf;
struct list_head attachments; struct list_head attachments;
/* protected by amdkfd_process_info.lock */ /* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list; struct ttm_validate_buffer validate_list;

View file

@ -529,6 +529,16 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
return ret; return ret;
} }
static int
kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
static int static int
kfd_mem_dmamap_attachment(struct kgd_mem *mem, kfd_mem_dmamap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment) struct kfd_mem_attachment *attachment)
@ -538,6 +548,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
return 0; return 0;
case KFD_MEM_ATT_USERPTR: case KFD_MEM_ATT_USERPTR:
return kfd_mem_dmamap_userptr(mem, attachment); return kfd_mem_dmamap_userptr(mem, attachment);
case KFD_MEM_ATT_DMABUF:
return kfd_mem_dmamap_dmabuf(attachment);
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
} }
@ -567,6 +579,19 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
ttm->sg = NULL; ttm->sg = NULL;
} }
static void
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
/* FIXME: This does not guarantee that amdgpu_ttm_tt_unpopulate is
* called
*/
}
static void static void
kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment) struct kfd_mem_attachment *attachment)
@ -577,6 +602,9 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
case KFD_MEM_ATT_USERPTR: case KFD_MEM_ATT_USERPTR:
kfd_mem_dmaunmap_userptr(mem, attachment); kfd_mem_dmaunmap_userptr(mem, attachment);
break; break;
case KFD_MEM_ATT_DMABUF:
kfd_mem_dmaunmap_dmabuf(attachment);
break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
} }
@ -610,6 +638,38 @@ kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
return 0; return 0;
} }
static int
kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
struct amdgpu_bo **bo)
{
struct drm_gem_object *gobj;
if (!mem->dmabuf) {
mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DRM_RDWR : 0);
if (IS_ERR(mem->dmabuf)) {
mem->dmabuf = NULL;
return PTR_ERR(mem->dmabuf);
}
}
gobj = amdgpu_gem_prime_import(&adev->ddev, mem->dmabuf);
if (IS_ERR(gobj))
return PTR_ERR(gobj);
/* Import takes an extra reference on the dmabuf. Drop it now to
* avoid leaking it. We only need the one reference in
* kgd_mem->dmabuf.
*/
dma_buf_put(mem->dmabuf);
*bo = gem_to_amdgpu_bo(gobj);
(*bo)->parent = amdgpu_bo_ref(mem->bo);
return 0;
}
/* kfd_mem_attach - Add a BO to a VM /* kfd_mem_attach - Add a BO to a VM
* *
* Everything that needs to bo done only once when a BO is first added * Everything that needs to bo done only once when a BO is first added
@ -667,8 +727,20 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
if (ret) if (ret)
goto unwind; goto unwind;
} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
mem->bo->tbo.type != ttm_bo_type_sg) {
/* GTT BOs use DMA-mapping ability of dynamic-attach
* DMA bufs. TODO: The same should work for VRAM on
* large-BAR GPUs.
*/
attachment[i]->type = KFD_MEM_ATT_DMABUF;
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
if (ret)
goto unwind;
} else { } else {
/* FIXME: Need to DMA-map other BO types */ /* FIXME: Need to DMA-map other BO types:
* large-BAR VRAM, doorbells, MMIO remap
*/
attachment[i]->type = KFD_MEM_ATT_SHARED; attachment[i]->type = KFD_MEM_ATT_SHARED;
bo[i] = mem->bo; bo[i] = mem->bo;
drm_gem_object_get(&bo[i]->tbo.base); drm_gem_object_get(&bo[i]->tbo.base);
@ -1527,6 +1599,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the BO*/ /* Free the BO*/
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
if (mem->dmabuf)
dma_buf_put(mem->dmabuf);
drm_gem_object_put(&mem->bo->tbo.base); drm_gem_object_put(&mem->bo->tbo.base);
mutex_destroy(&mem->lock); mutex_destroy(&mem->lock);
kfree(mem); kfree(mem);