drm/amdgpu: allocate entities on demand

Currently we pre-allocate entities and fences for all the HW IPs on context creation and some of which are might never be used. This patch tries to resolve entity/fences wastage by creating entity only when needed. v2: allocate memory for entity and fences together Signed-off-by: Nirmoy Das <nirmoy.das@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2024-10-01 06:33:07 +00:00 · 2020-01-21 15:53:53 +01:00 · 2020-01-21 15:53:53 +01:00 · 977f7e1068
commit 977f7e1068
parent 18c6b74e7c
2 changed files with 124 additions and 117 deletions
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
 	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
 };
 static int amdgpu_ctx_total_num_entities(void)
 {
 	unsigned i, num_entities = 0;
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
 		num_entities += amdgpu_ctx_num_entities[i];
 	return num_entities;
 }
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
 	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
 		return -EINVAL;
 	/* NORMAL and below are accessible by everyone */
 	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
 		return 0;
@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 	return -EACCES;
 }
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
 			   enum drm_sched_priority priority,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
+	struct amdgpu_device *adev = ctx->adev;
-	unsigned i, j;
+	struct amdgpu_ctx_entity *entity;
 	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
 	unsigned num_scheds = 0;
 	enum drm_sched_priority priority;
 	int r;
-	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+	entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
-		return -EINVAL;
+			 GFP_KERNEL);
 	if (!entity)
 		return  -ENOMEM;
-	r = amdgpu_ctx_priority_permit(filp, priority);
+	entity->sequence = 1;
-	if (r)
+	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
-		return r;
+				ctx->init_priority : ctx->override_priority;
-
+	switch (hw_ip) {
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
 	ctx->entities[0] = kcalloc(num_entities,
 				   sizeof(struct amdgpu_ctx_entity),
 				   GFP_KERNEL);
 	if (!ctx->entities[0])
 		return -ENOMEM;
 	for (i = 0; i < num_entities; ++i) {
 		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
 		entity->sequence = 1;
 		entity->fences = kcalloc(amdgpu_sched_jobs,
 					 sizeof(struct dma_fence*), GFP_KERNEL);
 		if (!entity->fences) {
 			r = -ENOMEM;
 			goto error_cleanup_memory;
 		}
 	}
 	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
 		ctx->entities[i] = ctx->entities[i - 1] +
 			amdgpu_ctx_num_entities[i - 1];
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
 	mutex_init(&ctx->lock);
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
 	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	ctx->init_priority = priority;
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		struct drm_gpu_scheduler **scheds;
 		struct drm_gpu_scheduler *sched;
 		unsigned num_scheds = 0;
 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
 			sched = &adev->gfx.gfx_ring[0].sched;
 			scheds = &sched;
@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			scheds = adev->jpeg.jpeg_sched;
 			num_scheds =  adev->jpeg.num_jpeg_sched;
 			break;
 		}
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
 						  priority, scheds,
 						  num_scheds, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
 	r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
 				  &ctx->guilty);
 	if (r)
 		goto error_free_entity;
 	ctx->entities[hw_ip][ring] = entity;
 	return 0;
 error_free_entity:
 	kfree(entity);
 	return r;
 }
 static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   enum drm_sched_priority priority,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
 	int r;
 	r = amdgpu_ctx_priority_permit(filp, priority);
 	if (r)
 		return r;
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
 	mutex_init(&ctx->lock);
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
 	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	ctx->init_priority = priority;
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 	return 0;
-error_cleanup_entities:
+}
 	for (i = 0; i < num_entities; ++i)
 		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-error_cleanup_memory:
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
-	for (i = 0; i < num_entities; ++i) {
+{
 		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-		kfree(entity->fences);
+	int i;
 		entity->fences = NULL;
 	}
-	kfree(ctx->entities[0]);
+	if (!entity)
-	ctx->entities[0] = NULL;
+		return;
-	return r;
+
 	for (i = 0; i < amdgpu_sched_jobs; ++i)
 		dma_fence_put(entity->fences[i]);
 	kfree(entity);
 }
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 	if (!adev)
 		return;
-	for (i = 0; i < num_entities; ++i) {
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
-
+			amdgpu_ctx_fini_entity(ctx->entities[i][j]);
-		for (j = 0; j < amdgpu_sched_jobs; ++j)
+			ctx->entities[i][j] = NULL;
-			dma_fence_put(entity->fences[j]);
+		}
 		kfree(entity->fences);
 	}
 	kfree(ctx->entities[0]);
 	mutex_destroy(&ctx->lock);
 	kfree(ctx);
 }
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 			  u32 ring, struct drm_sched_entity **entity)
 {
 	int r;
 	if (hw_ip >= AMDGPU_HW_IP_NUM) {
 		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 		return -EINVAL;
@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
-	*entity = &ctx->entities[hw_ip][ring].entity;
+	if (ctx->entities[hw_ip][ring] == NULL) {
 		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
 		if (r)
 			return r;
 	}
 	*entity = &ctx->entities[hw_ip][ring]->entity;
 	return 0;
 }
@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
-	unsigned num_entities;
+	u32 i, j;
 	u32 i;
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 			if (!ctx->entities[i][j])
 				continue;
-	num_entities = amdgpu_ctx_total_num_entities();
+			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
-	for (i = 0; i < num_entities; i++)
+		}
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	}
 	amdgpu_ctx_fini(ref);
 }
@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	enum drm_sched_priority ctx_prio;
-	unsigned i;
+	unsigned i, j;
 	ctx->override_priority = priority;
 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 			struct drm_sched_entity *entity;
-	for (i = 0; i < num_entities; i++) {
+			if (!ctx->entities[i][j])
-		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+				continue;
-		drm_sched_entity_set_priority(entity, ctx_prio);
+			entity = &ctx->entities[i][j]->entity;
 			drm_sched_entity_set_priority(entity, ctx_prio);
 		}
 	}
 }
@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 	idp = &mgr->ctx_handles;
 	mutex_lock(&mgr->lock);
 	idr_for_each_entry(idp, ctx, id) {
-		for (i = 0; i < num_entities; i++) {
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-			struct drm_sched_entity *entity;
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 				struct drm_sched_entity *entity;
-			entity = &ctx->entities[0][i].entity;
+				if (!ctx->entities[i][j])
-			timeout = drm_sched_entity_flush(entity, timeout);
+					continue;
 				entity = &ctx->entities[i][j]->entity;
 				timeout = drm_sched_entity_flush(entity, timeout);
 			}
 		}
 	}
 	mutex_unlock(&mgr->lock);
@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 	idp = &mgr->ctx_handles;
@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			continue;
 		}
-		for (i = 0; i < num_entities; i++)
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-			drm_sched_entity_fini(&ctx->entities[0][i].entity);
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 				struct drm_sched_entity *entity;
 				if (!ctx->entities[i][j])
 					continue;
 				entity = &ctx->entities[i][j]->entity;
 				drm_sched_entity_fini(entity);
 			}
 		}
 	}
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@ -29,10 +29,12 @@ struct drm_device;
 struct drm_file;
 struct amdgpu_fpriv;
 #define AMDGPU_MAX_ENTITY_NUM 4
 struct amdgpu_ctx_entity {
 	uint64_t		sequence;
 	struct dma_fence	**fences;
 	struct drm_sched_entity	entity;
 	struct dma_fence	*fences[];
 };
 struct amdgpu_ctx {
@ -42,7 +44,7 @@ struct amdgpu_ctx {
 	unsigned			reset_counter_query;
 	uint32_t			vram_lost_counter;
 	spinlock_t			ring_lock;
-	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM];
+	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
 	bool				preamble_presented;
 	enum drm_sched_priority		init_priority;
 	enum drm_sched_priority		override_priority;