diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a12add..ff47b1f69b68 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, } q_properties->is_interop = false; + q_properties->is_gws = false; q_properties->queue_percent = args->queue_percentage; q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 77ea0f0cb163..ae9547791813 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_vmid(dqm, qpd, q); } qpd->queue_count--; - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } + } return retval; } @@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (!q->properties.is_active && prev_active) decrement_queue_count(dqm, q->properties.type); + if (q->gws && !q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count++; + pdd->qpd.mapped_gws_queue = true; + } + q->properties.is_gws = true; + } else if (!q->gws && q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count--; + pdd->qpd.mapped_gws_queue = false; + } + q->properties.is_gws = false; + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && @@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.type)]; q->properties.is_active = false; decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.type)]; q->properties.is_active = true; increment_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count++; + qpd->mapped_gws_queue = true; + } if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; dqm->active_cp_queue_count = 0; + dqm->gws_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -1082,7 +1110,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->active_queue_count = dqm->processes_count = 0; dqm->active_cp_queue_count = 0; - + dqm->gws_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -1432,6 +1460,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval == -ETIME) qpd->reset_wavefronts = true; + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } } /* @@ -1650,8 +1682,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } + } dqm->total_queue_count--; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 50d919f814e9..4afa015c69b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -182,6 +182,7 @@ struct device_queue_manager { unsigned int processes_count; unsigned int active_queue_count; unsigned int active_cp_queue_count; + unsigned int gws_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index bae706462f96..a2b77d1df854 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.queue_size = queue_size; prop.is_interop = false; + prop.is_gws = false; prop.priority = 1; prop.queue_percent = 100; prop.type = type; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index efdb75e7677b..685ca82d42fe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) { - unsigned int process_count, queue_count, compute_queue_count; + unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; unsigned int max_proc_per_quantum = 1; struct kfd_dev *dev = pm->dqm->dev; @@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->active_queue_count; compute_queue_count = pm->dqm->active_cp_queue_count; + gws_queue_count = pm->dqm->gws_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and @@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, max_proc_per_quantum = dev->max_proc_per_quantum; if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm)) { + compute_queue_count > get_cp_queues_num(pm->dqm) || + gws_queue_count > 1) { *over_subscription = true; pr_debug("Over subscribed runlist\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 2de01009f1b6..bdca9dc5f118 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->bitfields2.pasid = qpd->pqm->process->pasid; packet->bitfields14.gds_size = qpd->gds_size & 0x3F; packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; - packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; packet->bitfields14.num_oac = qpd->num_oac; packet->bitfields14.sdma_enable = 1; packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 43b888b311c7..d48b33449267 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -411,6 +411,10 @@ enum KFD_QUEUE_PRIORITY { * @is_active: Defines if the queue is active or not. @is_active and * @is_evicted are protected by the DQM lock. * + * @is_gws: Defines if the queue has been updated to be GWS-capable or not. + * @is_gws should be protected by the DQM lock, since changing it can yield the + * possibility of updating DQM state on number of GWS queues. + * * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid * of the queue. * @@ -433,6 +437,7 @@ struct queue_properties { bool is_interop; bool is_evicted; bool is_active; + bool is_gws; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; /* Relevant only for sdma queues*/ @@ -564,6 +569,14 @@ struct qcm_process_device { */ bool reset_wavefronts; + /* This flag tells us if this process has a GWS-capable + * queue that will be mapped into the runlist. It's + * possible to request a GWS BO, but not have the queue + * currently mapped, and this changes how the MAP_PROCESS + * PM4 packet is configured. + */ + bool mapped_gws_queue; + /* * All the memory management data should be here too */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fe0cd49d4ea7..82b4c5a9382a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -858,6 +858,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->qpd.dqm = dev->dqm; pdd->qpd.pqm = &p->pqm; pdd->qpd.evicted = 0; + pdd->qpd.mapped_gws_queue = false; pdd->process = p; pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false;