mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-01 06:33:07 +00:00
drm/amdgpu: allocate entities on demand
Currently we pre-allocate entities and fences for all the HW IPs on context creation and some of which are might never be used. This patch tries to resolve entity/fences wastage by creating entity only when needed. v2: allocate memory for entity and fences together Signed-off-by: Nirmoy Das <nirmoy.das@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
18c6b74e7c
commit
977f7e1068
2 changed files with 124 additions and 117 deletions
|
@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
|
||||||
[AMDGPU_HW_IP_VCN_JPEG] = 1,
|
[AMDGPU_HW_IP_VCN_JPEG] = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int amdgpu_ctx_total_num_entities(void)
|
|
||||||
{
|
|
||||||
unsigned i, num_entities = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
|
|
||||||
num_entities += amdgpu_ctx_num_entities[i];
|
|
||||||
|
|
||||||
return num_entities;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
|
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
|
||||||
enum drm_sched_priority priority)
|
enum drm_sched_priority priority)
|
||||||
{
|
{
|
||||||
|
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
/* NORMAL and below are accessible by everyone */
|
/* NORMAL and below are accessible by everyone */
|
||||||
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
|
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
|
||||||
enum drm_sched_priority priority,
|
|
||||||
struct drm_file *filp,
|
|
||||||
struct amdgpu_ctx *ctx)
|
|
||||||
{
|
{
|
||||||
unsigned num_entities = amdgpu_ctx_total_num_entities();
|
struct amdgpu_device *adev = ctx->adev;
|
||||||
unsigned i, j;
|
struct amdgpu_ctx_entity *entity;
|
||||||
|
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
|
||||||
|
unsigned num_scheds = 0;
|
||||||
|
enum drm_sched_priority priority;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
|
entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
|
||||||
return -EINVAL;
|
GFP_KERNEL);
|
||||||
|
if (!entity)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
r = amdgpu_ctx_priority_permit(filp, priority);
|
entity->sequence = 1;
|
||||||
if (r)
|
priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
|
||||||
return r;
|
ctx->init_priority : ctx->override_priority;
|
||||||
|
switch (hw_ip) {
|
||||||
memset(ctx, 0, sizeof(*ctx));
|
|
||||||
ctx->adev = adev;
|
|
||||||
|
|
||||||
|
|
||||||
ctx->entities[0] = kcalloc(num_entities,
|
|
||||||
sizeof(struct amdgpu_ctx_entity),
|
|
||||||
GFP_KERNEL);
|
|
||||||
if (!ctx->entities[0])
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
|
|
||||||
for (i = 0; i < num_entities; ++i) {
|
|
||||||
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
|
|
||||||
|
|
||||||
entity->sequence = 1;
|
|
||||||
entity->fences = kcalloc(amdgpu_sched_jobs,
|
|
||||||
sizeof(struct dma_fence*), GFP_KERNEL);
|
|
||||||
if (!entity->fences) {
|
|
||||||
r = -ENOMEM;
|
|
||||||
goto error_cleanup_memory;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
|
|
||||||
ctx->entities[i] = ctx->entities[i - 1] +
|
|
||||||
amdgpu_ctx_num_entities[i - 1];
|
|
||||||
|
|
||||||
kref_init(&ctx->refcount);
|
|
||||||
spin_lock_init(&ctx->ring_lock);
|
|
||||||
mutex_init(&ctx->lock);
|
|
||||||
|
|
||||||
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
|
|
||||||
ctx->reset_counter_query = ctx->reset_counter;
|
|
||||||
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
|
|
||||||
ctx->init_priority = priority;
|
|
||||||
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
|
|
||||||
|
|
||||||
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
|
||||||
struct drm_gpu_scheduler **scheds;
|
|
||||||
struct drm_gpu_scheduler *sched;
|
|
||||||
unsigned num_scheds = 0;
|
|
||||||
|
|
||||||
switch (i) {
|
|
||||||
case AMDGPU_HW_IP_GFX:
|
case AMDGPU_HW_IP_GFX:
|
||||||
sched = &adev->gfx.gfx_ring[0].sched;
|
sched = &adev->gfx.gfx_ring[0].sched;
|
||||||
scheds = &sched;
|
scheds = &sched;
|
||||||
|
@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
||||||
scheds = adev->jpeg.jpeg_sched;
|
scheds = adev->jpeg.jpeg_sched;
|
||||||
num_scheds = adev->jpeg.num_jpeg_sched;
|
num_scheds = adev->jpeg.num_jpeg_sched;
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
|
|
||||||
r = drm_sched_entity_init(&ctx->entities[i][j].entity,
|
|
||||||
priority, scheds,
|
|
||||||
num_scheds, &ctx->guilty);
|
|
||||||
if (r)
|
|
||||||
goto error_cleanup_entities;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
|
||||||
|
&ctx->guilty);
|
||||||
|
if (r)
|
||||||
|
goto error_free_entity;
|
||||||
|
|
||||||
|
ctx->entities[hw_ip][ring] = entity;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error_free_entity:
|
||||||
|
kfree(entity);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
||||||
|
enum drm_sched_priority priority,
|
||||||
|
struct drm_file *filp,
|
||||||
|
struct amdgpu_ctx *ctx)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
r = amdgpu_ctx_priority_permit(filp, priority);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
memset(ctx, 0, sizeof(*ctx));
|
||||||
|
|
||||||
|
ctx->adev = adev;
|
||||||
|
|
||||||
|
kref_init(&ctx->refcount);
|
||||||
|
spin_lock_init(&ctx->ring_lock);
|
||||||
|
mutex_init(&ctx->lock);
|
||||||
|
|
||||||
|
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
|
||||||
|
ctx->reset_counter_query = ctx->reset_counter;
|
||||||
|
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
|
||||||
|
ctx->init_priority = priority;
|
||||||
|
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error_cleanup_entities:
|
}
|
||||||
for (i = 0; i < num_entities; ++i)
|
|
||||||
drm_sched_entity_destroy(&ctx->entities[0][i].entity);
|
|
||||||
|
|
||||||
error_cleanup_memory:
|
static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
|
||||||
for (i = 0; i < num_entities; ++i) {
|
{
|
||||||
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
|
|
||||||
|
|
||||||
kfree(entity->fences);
|
int i;
|
||||||
entity->fences = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
kfree(ctx->entities[0]);
|
if (!entity)
|
||||||
ctx->entities[0] = NULL;
|
return;
|
||||||
return r;
|
|
||||||
|
for (i = 0; i < amdgpu_sched_jobs; ++i)
|
||||||
|
dma_fence_put(entity->fences[i]);
|
||||||
|
|
||||||
|
kfree(entity);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amdgpu_ctx_fini(struct kref *ref)
|
static void amdgpu_ctx_fini(struct kref *ref)
|
||||||
{
|
{
|
||||||
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
||||||
unsigned num_entities = amdgpu_ctx_total_num_entities();
|
|
||||||
struct amdgpu_device *adev = ctx->adev;
|
struct amdgpu_device *adev = ctx->adev;
|
||||||
unsigned i, j;
|
unsigned i, j;
|
||||||
|
|
||||||
if (!adev)
|
if (!adev)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (i = 0; i < num_entities; ++i) {
|
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||||
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
|
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
|
||||||
|
amdgpu_ctx_fini_entity(ctx->entities[i][j]);
|
||||||
for (j = 0; j < amdgpu_sched_jobs; ++j)
|
ctx->entities[i][j] = NULL;
|
||||||
dma_fence_put(entity->fences[j]);
|
}
|
||||||
|
|
||||||
kfree(entity->fences);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(ctx->entities[0]);
|
|
||||||
mutex_destroy(&ctx->lock);
|
mutex_destroy(&ctx->lock);
|
||||||
|
|
||||||
kfree(ctx);
|
kfree(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
|
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
|
||||||
u32 ring, struct drm_sched_entity **entity)
|
u32 ring, struct drm_sched_entity **entity)
|
||||||
{
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
if (hw_ip >= AMDGPU_HW_IP_NUM) {
|
if (hw_ip >= AMDGPU_HW_IP_NUM) {
|
||||||
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
|
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
*entity = &ctx->entities[hw_ip][ring].entity;
|
if (ctx->entities[hw_ip][ring] == NULL) {
|
||||||
|
r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
*entity = &ctx->entities[hw_ip][ring]->entity;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
|
||||||
static void amdgpu_ctx_do_release(struct kref *ref)
|
static void amdgpu_ctx_do_release(struct kref *ref)
|
||||||
{
|
{
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
unsigned num_entities;
|
u32 i, j;
|
||||||
u32 i;
|
|
||||||
|
|
||||||
ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
||||||
|
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||||
|
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
|
||||||
|
if (!ctx->entities[i][j])
|
||||||
|
continue;
|
||||||
|
|
||||||
num_entities = amdgpu_ctx_total_num_entities();
|
drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
|
||||||
for (i = 0; i < num_entities; i++)
|
}
|
||||||
drm_sched_entity_destroy(&ctx->entities[0][i].entity);
|
}
|
||||||
|
|
||||||
amdgpu_ctx_fini(ref);
|
amdgpu_ctx_fini(ref);
|
||||||
}
|
}
|
||||||
|
@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
|
||||||
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
|
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
|
||||||
enum drm_sched_priority priority)
|
enum drm_sched_priority priority)
|
||||||
{
|
{
|
||||||
unsigned num_entities = amdgpu_ctx_total_num_entities();
|
|
||||||
enum drm_sched_priority ctx_prio;
|
enum drm_sched_priority ctx_prio;
|
||||||
unsigned i;
|
unsigned i, j;
|
||||||
|
|
||||||
ctx->override_priority = priority;
|
ctx->override_priority = priority;
|
||||||
|
|
||||||
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
|
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
|
||||||
ctx->init_priority : ctx->override_priority;
|
ctx->init_priority : ctx->override_priority;
|
||||||
|
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||||
|
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
|
||||||
|
struct drm_sched_entity *entity;
|
||||||
|
|
||||||
for (i = 0; i < num_entities; i++) {
|
if (!ctx->entities[i][j])
|
||||||
struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
|
continue;
|
||||||
|
|
||||||
drm_sched_entity_set_priority(entity, ctx_prio);
|
entity = &ctx->entities[i][j]->entity;
|
||||||
|
drm_sched_entity_set_priority(entity, ctx_prio);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
|
||||||
|
|
||||||
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
|
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
|
||||||
{
|
{
|
||||||
unsigned num_entities = amdgpu_ctx_total_num_entities();
|
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
struct idr *idp;
|
struct idr *idp;
|
||||||
uint32_t id, i;
|
uint32_t id, i, j;
|
||||||
|
|
||||||
idp = &mgr->ctx_handles;
|
idp = &mgr->ctx_handles;
|
||||||
|
|
||||||
mutex_lock(&mgr->lock);
|
mutex_lock(&mgr->lock);
|
||||||
idr_for_each_entry(idp, ctx, id) {
|
idr_for_each_entry(idp, ctx, id) {
|
||||||
for (i = 0; i < num_entities; i++) {
|
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||||
struct drm_sched_entity *entity;
|
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
|
||||||
|
struct drm_sched_entity *entity;
|
||||||
|
|
||||||
entity = &ctx->entities[0][i].entity;
|
if (!ctx->entities[i][j])
|
||||||
timeout = drm_sched_entity_flush(entity, timeout);
|
continue;
|
||||||
|
|
||||||
|
entity = &ctx->entities[i][j]->entity;
|
||||||
|
timeout = drm_sched_entity_flush(entity, timeout);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutex_unlock(&mgr->lock);
|
mutex_unlock(&mgr->lock);
|
||||||
|
@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
|
||||||
|
|
||||||
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
|
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
|
||||||
{
|
{
|
||||||
unsigned num_entities = amdgpu_ctx_total_num_entities();
|
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
struct idr *idp;
|
struct idr *idp;
|
||||||
uint32_t id, i;
|
uint32_t id, i, j;
|
||||||
|
|
||||||
idp = &mgr->ctx_handles;
|
idp = &mgr->ctx_handles;
|
||||||
|
|
||||||
|
@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_entities; i++)
|
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||||
drm_sched_entity_fini(&ctx->entities[0][i].entity);
|
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
|
||||||
|
struct drm_sched_entity *entity;
|
||||||
|
|
||||||
|
if (!ctx->entities[i][j])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
entity = &ctx->entities[i][j]->entity;
|
||||||
|
drm_sched_entity_fini(entity);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,10 +29,12 @@ struct drm_device;
|
||||||
struct drm_file;
|
struct drm_file;
|
||||||
struct amdgpu_fpriv;
|
struct amdgpu_fpriv;
|
||||||
|
|
||||||
|
#define AMDGPU_MAX_ENTITY_NUM 4
|
||||||
|
|
||||||
struct amdgpu_ctx_entity {
|
struct amdgpu_ctx_entity {
|
||||||
uint64_t sequence;
|
uint64_t sequence;
|
||||||
struct dma_fence **fences;
|
|
||||||
struct drm_sched_entity entity;
|
struct drm_sched_entity entity;
|
||||||
|
struct dma_fence *fences[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_ctx {
|
struct amdgpu_ctx {
|
||||||
|
@ -42,7 +44,7 @@ struct amdgpu_ctx {
|
||||||
unsigned reset_counter_query;
|
unsigned reset_counter_query;
|
||||||
uint32_t vram_lost_counter;
|
uint32_t vram_lost_counter;
|
||||||
spinlock_t ring_lock;
|
spinlock_t ring_lock;
|
||||||
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
|
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
|
||||||
bool preamble_presented;
|
bool preamble_presented;
|
||||||
enum drm_sched_priority init_priority;
|
enum drm_sched_priority init_priority;
|
||||||
enum drm_sched_priority override_priority;
|
enum drm_sched_priority override_priority;
|
||||||
|
|
Loading…
Reference in a new issue