diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index d0947c5c4ab8..e539c42a3e78 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -466,9 +466,6 @@ GuC-based command submission .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :doc: GuC-based command submission -.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c - :internal: - HuC --- .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 0e94b720c993..8408cb84e52c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -21,6 +21,7 @@ #include "intel_reset.h" #include "uc/intel_guc.h" +#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -1085,6 +1086,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; + bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); @@ -1100,14 +1102,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->gt->uc.guc.execbuf_client) + if (!uses_guc) ret = intel_gt_reset_engine(engine); else ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->gt->uc.guc.execbuf_client ? "GuC " : "", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index b2d1766e689a..cd09c912e361 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,9 +46,13 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; - struct ida stage_ids; - struct intel_guc_client *execbuf_client; + struct i915_vma *workqueue; + void *workqueue_vaddr; + spinlock_t wq_lock; + + struct i915_vma *proc_desc; + void *proc_desc_vaddr; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 1e8e4af7d9ca..a6b733c146c9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -31,7 +31,6 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_PD_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) /* Work queue item header definitions */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 334bd4ded642..172220e83079 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -27,24 +27,13 @@ * code) matches the old submission model and will be updated as part of the * upgrade to the new flow. * - * GuC client: - * A intel_guc_client refers to a submission path through GuC. Currently, there - * is only one client, which is charged with all submissions to the GuC. This - * struct is the owner of a process descriptor and a workqueue (both of them - * inside a single gem object that contains all required pages for these - * elements). - * * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a intel_guc_client and a - * guc_stage_desc (via the client's stage_id), so effectively only one - * gets used. This stage descriptor lets the GuC know about the workqueue and + * descriptors, and shares them with the GuC. Currently, we only use one + * descriptor. This stage descriptor lets the GuC know about the workqueue and * process descriptor. Theoretically, it also lets the GuC know about our HW * contexts (context ID, etc...), but we actually employ a kind of submission - * where the GuC uses the LRCA sent via the work item instead (the single - * guc_stage_desc associated to execbuf client contains information about the - * default kernel context only, but this is essentially unused). This is called + * where the GuC uses the LRCA sent via the work item instead. This is called * a "proxy" submission. * * The Scratch registers: @@ -71,33 +60,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static inline bool is_high_priority(struct intel_guc_client *client) +static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) { - return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || - client->priority == GUC_CLIENT_PRIORITY_HIGH); + struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; + + return &base[id]; } -static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) +static int guc_workqueue_create(struct intel_guc *guc) { - struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; - - return &base[client->stage_id]; + return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, + &guc->workqueue_vaddr); } -static inline struct guc_process_desc * -__get_process_desc(struct intel_guc_client *client) +static void guc_workqueue_destroy(struct intel_guc *guc) { - return client->vaddr; + i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); } /* * Initialise the process descriptor shared with the GuC firmware. */ -static void guc_proc_desc_init(struct intel_guc_client *client) +static int guc_proc_desc_create(struct intel_guc *guc) +{ + const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); + + return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, + &guc->proc_desc_vaddr); +} + +static void guc_proc_desc_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); +} + +static void guc_proc_desc_init(struct intel_guc *guc) { struct guc_process_desc *desc; - desc = memset(__get_process_desc(client), 0, sizeof(*desc)); + desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); /* * XXX: pDoorbell and WQVBaseAddress are pointers in process address @@ -108,39 +109,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client) desc->wq_base_addr = 0; desc->db_base_addr = 0; - desc->stage_id = client->stage_id; desc->wq_size_bytes = GUC_WQ_SIZE; desc->wq_status = WQ_STATUS_ACTIVE; - desc->priority = client->priority; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; } -static void guc_proc_desc_fini(struct intel_guc_client *client) +static void guc_proc_desc_fini(struct intel_guc *guc) { - struct guc_process_desc *desc; - - desc = __get_process_desc(client); - memset(desc, 0, sizeof(*desc)); + memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); } static int guc_stage_desc_pool_create(struct intel_guc *guc) { u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * GUC_MAX_STAGE_DESCRIPTORS); - int ret; - ret = intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, - &guc->stage_desc_pool_vaddr); - if (ret) - return ret; - - ida_init(&guc->stage_ids); - - return 0; + return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, + &guc->stage_desc_pool_vaddr); } static void guc_stage_desc_pool_destroy(struct intel_guc *guc) { - ida_destroy(&guc->stage_ids); i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); } @@ -148,58 +137,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) * Initialise/clear the stage descriptor shared with the GuC firmware. * * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures relating to this client (process descriptor, write queue, + * data structures related to work submission (process descriptor, write queue, * etc). */ -static void guc_stage_desc_init(struct intel_guc_client *client) +static void guc_stage_desc_init(struct intel_guc *guc) { - struct intel_guc *guc = client->guc; struct guc_stage_desc *desc; - u32 gfx_addr; - desc = __get_stage_desc(client); + /* we only use 1 stage desc, so hardcode it to 0 */ + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; - if (is_high_priority(client)) - desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; - desc->stage_id = client->stage_id; - desc->priority = client->priority; - /* - * The process descriptor and workqueue are all parts of the client - * object, which the GuC will reference via the GGTT - */ - gfx_addr = intel_guc_ggtt_offset(guc, client->vma); - desc->process_desc = gfx_addr; - desc->wq_addr = gfx_addr + GUC_PD_SIZE; + desc->stage_id = 0; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; + + desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); + desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); desc->wq_size = GUC_WQ_SIZE; - - desc->desc_private = ptr_to_u64(client); } -static void guc_stage_desc_fini(struct intel_guc_client *client) +static void guc_stage_desc_fini(struct intel_guc *guc) { struct guc_stage_desc *desc; - desc = __get_stage_desc(client); + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); } /* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct intel_guc_client *client, +static void guc_wq_item_append(struct intel_guc *guc, u32 target_engine, u32 context_desc, u32 ring_tail, u32 fence_id) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct guc_process_desc *desc = __get_process_desc(client); + struct guc_process_desc *desc = guc->proc_desc_vaddr; struct guc_wq_item *wqi; u32 wq_off; - lockdep_assert_held(&client->wq_lock); + lockdep_assert_held(&guc->wq_lock); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -219,8 +199,7 @@ static void guc_wq_item_append(struct intel_guc_client *client, GUC_WQ_SIZE) < wqi_size); GEM_BUG_ON(wq_off & (wqi_size - 1)); - /* WQ starts from the page after process_desc */ - wqi = client->vaddr + wq_off + GUC_PD_SIZE; + wqi = guc->workqueue_vaddr + wq_off; /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | @@ -238,12 +217,11 @@ static void guc_wq_item_append(struct intel_guc_client *client, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - guc_wq_item_append(client, engine->guc_id, ctx_desc, + guc_wq_item_append(guc, engine->guc_id, ctx_desc, ring_tail, rq->fence.seqno); } @@ -266,9 +244,8 @@ static void guc_submit(struct intel_engine_cs *engine, struct i915_request **end) { struct intel_guc *guc = &engine->gt->uc.guc; - struct intel_guc_client *client = guc->execbuf_client; - spin_lock(&client->wq_lock); + spin_lock(&guc->wq_lock); do { struct i915_request *rq = *out++; @@ -277,7 +254,7 @@ static void guc_submit(struct intel_engine_cs *engine, guc_add_request(guc, rq); } while (out != end); - spin_unlock(&client->wq_lock); + spin_unlock(&guc->wq_lock); } static inline int rq_prio(const struct i915_request *rq) @@ -528,126 +505,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine) * path of guc_submit() above. */ -/** - * guc_client_alloc() - Allocate an intel_guc_client - * @guc: the intel_guc structure - * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. - * - * Return: An intel_guc_client object if success, else NULL. - */ -static struct intel_guc_client * -guc_client_alloc(struct intel_guc *guc, u32 priority) -{ - struct intel_guc_client *client; - struct i915_vma *vma; - void *vaddr; - int ret; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return ERR_PTR(-ENOMEM); - - client->guc = guc; - client->priority = priority; - spin_lock_init(&client->wq_lock); - - ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); - if (ret < 0) - goto err_client; - - client->stage_id = ret; - - /* The first page is proc_desc. Two following pages are wq. */ - vma = intel_guc_allocate_vma(guc, GUC_PD_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_id; - } - - /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->vma = vma; - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - client->vaddr = vaddr; - - DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n", - priority, client, client->stage_id); - - return client; - -err_vma: - i915_vma_unpin_and_release(&client->vma, 0); -err_id: - ida_simple_remove(&guc->stage_ids, client->stage_id); -err_client: - kfree(client); - return ERR_PTR(ret); -} - -static void guc_client_free(struct intel_guc_client *client) -{ - i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); - ida_simple_remove(&client->guc->stage_ids, client->stage_id); - kfree(client); -} - -static int guc_clients_create(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - GEM_BUG_ON(guc->execbuf_client); - - client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for submission!\n"); - return PTR_ERR(client); - } - guc->execbuf_client = client; - - return 0; -} - -static void guc_clients_destroy(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - client = fetch_and_zero(&guc->execbuf_client); - if (client) - guc_client_free(client); -} - -static void __guc_client_enable(struct intel_guc_client *client) -{ - guc_proc_desc_init(client); - guc_stage_desc_init(client); -} - -static void __guc_client_disable(struct intel_guc_client *client) -{ - /* Note: By the time we're here, GuC may have already been reset */ - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); -} - -static void guc_clients_enable(struct intel_guc *guc) -{ - __guc_client_enable(guc->execbuf_client); -} - -static void guc_clients_disable(struct intel_guc *guc) -{ - if (guc->execbuf_client) - __guc_client_disable(guc->execbuf_client); -} - /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -668,12 +525,20 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - ret = guc_clients_create(guc); + ret = guc_workqueue_create(guc); if (ret) goto err_pool; + ret = guc_proc_desc_create(guc); + if (ret) + goto err_workqueue; + + spin_lock_init(&guc->wq_lock); + return 0; +err_workqueue: + guc_workqueue_destroy(guc); err_pool: guc_stage_desc_pool_destroy(guc); return ret; @@ -681,10 +546,11 @@ int intel_guc_submission_init(struct intel_guc *guc) void intel_guc_submission_fini(struct intel_guc *guc) { - guc_clients_destroy(guc); - - if (guc->stage_desc_pool) + if (guc->stage_desc_pool) { + guc_proc_desc_destroy(guc); + guc_workqueue_destroy(guc); guc_stage_desc_pool_destroy(guc); + } } static void guc_interrupts_capture(struct intel_gt *gt) @@ -770,9 +636,8 @@ void intel_guc_submission_enable(struct intel_guc *guc) sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); - GEM_BUG_ON(!guc->execbuf_client); - - guc_clients_enable(guc); + guc_proc_desc_init(guc); + guc_stage_desc_init(guc); /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); @@ -789,8 +654,12 @@ void intel_guc_submission_disable(struct intel_guc *guc) GEM_BUG_ON(gt->awake); /* GT should be parked first */ + /* Note: By the time we're here, GuC may have already been reset */ + guc_interrupts_release(gt); - guc_clients_disable(guc); + + guc_stage_desc_fini(guc); + guc_proc_desc_fini(guc); } static bool __guc_submission_support(struct intel_guc *guc) @@ -808,3 +677,8 @@ void intel_guc_submission_init_early(struct intel_guc *guc) { guc->submission_supported = __guc_submission_support(guc); } + +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == guc_set_default_submission; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index e2deb4e6f42a..e402a2932592 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -6,48 +6,10 @@ #ifndef _INTEL_GUC_SUBMISSION_H_ #define _INTEL_GUC_SUBMISSION_H_ -#include +#include -#include "gt/intel_engine_types.h" - -#include "i915_gem.h" -#include "i915_selftest.h" - -struct drm_i915_private; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct intel_guc_client { - struct i915_vma *vma; - void *vaddr; - struct intel_guc *guc; - - /* bitmap of (host) engine ids */ - u32 priority; - u32 stage_id; - - /* Protects GuC client's WQ access */ - spinlock_t wq_lock; -}; +struct intel_guc; +struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); int intel_guc_submission_init(struct intel_guc *guc); @@ -56,5 +18,6 @@ void intel_guc_submission_disable(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3e563384e84d..6cb149ba1905 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1792,23 +1792,12 @@ static void i915_guc_log_info(struct seq_file *m, static int i915_guc_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_guc *guc = &dev_priv->gt.uc.guc; - struct intel_guc_client *client = guc->execbuf_client; if (!USES_GUC(dev_priv)) return -ENODEV; i915_guc_log_info(m, dev_priv); - if (!USES_GUC_SUBMISSION(dev_priv)) - return 0; - - GEM_BUG_ON(!guc->execbuf_client); - - seq_printf(m, "\nGuC execbuf client @ %p:\n", client); - seq_printf(m, "\tPriority %d, GuC stage index: %u\n", - client->priority, - client->stage_id); /* Add more as required ... */ return 0;