diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 2007dc6f6b99..209cf265bf74 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -245,6 +245,9 @@ static void signal_irq_work(struct irq_work *work) llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; + if (rq->engine->sched_engine->retire_inflight_request_prio) + rq->engine->sched_engine->retire_inflight_request_prio(rq); + spin_lock(&rq->lock); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index a5bc876face7..e54351a170e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -18,8 +18,9 @@ #include "intel_engine_types.h" #include "intel_sseu.h" -#define CONTEXT_REDZONE POISON_INUSE +#include "uc/intel_guc_fwif.h" +#define CONTEXT_REDZONE POISON_INUSE DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; @@ -191,6 +192,12 @@ struct intel_context { /* GuC context blocked fence */ struct i915_sw_fence guc_blocked; + + /* + * GuC priority management + */ + u8 guc_prio; + u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 84142127ebd8..8f8bea08e734 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -11,6 +11,7 @@ #include "intel_engine.h" #include "intel_engine_user.h" #include "intel_gt.h" +#include "uc/intel_guc_submission.h" struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) @@ -115,6 +116,9 @@ static void set_scheduler_caps(struct drm_i915_private *i915) disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; + for (i = 0; i < ARRAY_SIZE(map); i++) { if (engine->flags & BIT(map[i].engine)) enabled |= BIT(map[i].sched); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 3ff42d6e934f..b760cbf6ca0e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -81,6 +81,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); */ #define SCHED_STATE_NO_LOCK_ENABLED BIT(0) #define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) +#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) static inline bool context_enabled(struct intel_context *ce) { return (atomic_read(&ce->guc_sched_state_no_lock) & @@ -116,6 +117,24 @@ static inline void clr_context_pending_enable(struct intel_context *ce) &ce->guc_sched_state_no_lock); } +static inline bool context_registered(struct intel_context *ce) +{ + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_REGISTERED); +} + +static inline void set_context_registered(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); +} + /* * Below is a set of functions which control the GuC scheduling state which * require a lock, aside from the special case where the functions are called @@ -1092,6 +1111,7 @@ static int steal_guc_id(struct intel_guc *guc) list_del_init(&ce->guc_id_link); guc_id = ce->guc_id; + clr_context_registered(ce); set_context_guc_id_invalid(ce); return guc_id; } else { @@ -1201,10 +1221,15 @@ static int register_context(struct intel_context *ce, bool loop) struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + ce->guc_id * sizeof(struct guc_lrc_desc); + int ret; trace_intel_context_register(ce); - return __guc_action_register_context(guc, ce->guc_id, offset, loop); + ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); + if (likely(!ret)) + set_context_registered(ce); + + return ret; } static int __guc_action_deregister_context(struct intel_guc *guc, @@ -1260,6 +1285,8 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } +static inline u8 map_i915_prio_to_guc_prio(int prio); + static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; @@ -1267,6 +1294,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct intel_guc *guc = &engine->gt->uc.guc; u32 desc_idx = ce->guc_id; struct guc_lrc_desc *desc; + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; bool context_registered; intel_wakeref_t wakeref; int ret = 0; @@ -1282,6 +1311,12 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) context_registered = lrc_desc_registered(guc, desc_idx); + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + reset_lrc_desc(guc, desc_idx); set_lrc_desc_registered(guc, desc_idx, ce); @@ -1290,7 +1325,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; + ce->guc_prio = map_i915_prio_to_guc_prio(prio); + desc->priority = ce->guc_prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); init_sched_state(ce); @@ -1693,11 +1729,17 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); GEM_BUG_ON(context_enabled(ce)); + clr_context_registered(ce); deregister_context(ce, ce->guc_id, true); } static void __guc_context_destroy(struct intel_context *ce) { + GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + lrc_fini(ce); intel_context_fini(ce); @@ -1791,15 +1833,124 @@ static int guc_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void guc_context_set_prio(struct intel_guc *guc, + struct intel_context *ce, + u8 prio) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, + ce->guc_id, + prio, + }; + + GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || + prio > GUC_CLIENT_PRIORITY_NORMAL); + + if (ce->guc_prio == prio || submission_disabled(guc) || + !context_registered(ce)) + return; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + + ce->guc_prio = prio; + trace_intel_context_set_prio(ce); +} + +static inline u8 map_i915_prio_to_guc_prio(int prio) +{ + if (prio == I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_KMD_NORMAL; + else if (prio < I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_NORMAL; + else if (prio < I915_PRIORITY_DISPLAY) + return GUC_CLIENT_PRIORITY_HIGH; + else + return GUC_CLIENT_PRIORITY_KMD_HIGH; +} + +static inline void add_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + ++ce->guc_prio_count[guc_prio]; + + /* Overflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); +} + +static inline void sub_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + /* Underflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + + --ce->guc_prio_count[guc_prio]; +} + +static inline void update_context_prio(struct intel_context *ce) +{ + struct intel_guc *guc = &ce->engine->gt->uc.guc; + int i; + + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); + + lockdep_assert_held(&ce->guc_active.lock); + + for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { + if (ce->guc_prio_count[i]) { + guc_context_set_prio(guc, ce, i); + break; + } + } +} + +static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) +{ + /* Lower value is higher priority */ + return new_guc_prio < old_guc_prio; +} + static void add_to_context(struct i915_request *rq) { struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); + + GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); spin_lock(&ce->guc_active.lock); list_move_tail(&rq->sched.link, &ce->guc_active.requests); + + if (rq->guc_prio == GUC_PRIO_INIT) { + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } + update_context_prio(ce); + spin_unlock(&ce->guc_active.lock); } +static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_active.lock); + + if (rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI) { + sub_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + rq->guc_prio = GUC_PRIO_FINI; +} + static void remove_from_context(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -1812,6 +1963,8 @@ static void remove_from_context(struct i915_request *rq) /* Prevent further __await_execution() registering a cb, then flush */ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + guc_prio_fini(rq, ce); + spin_unlock_irq(&ce->guc_active.lock); atomic_dec(&ce->guc_id_ref); @@ -2093,6 +2246,39 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine) } } +static void guc_bump_inflight_request_prio(struct i915_request *rq, + int prio) +{ + struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); + + /* Short circuit function */ + if (prio < I915_PRIORITY_NORMAL || + rq->guc_prio == GUC_PRIO_FINI || + (rq->guc_prio != GUC_PRIO_INIT && + !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + return; + + spin_lock(&ce->guc_active.lock); + if (rq->guc_prio != GUC_PRIO_FINI) { + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + spin_unlock(&ce->guc_active.lock); +} + +static void guc_retire_inflight_request_prio(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock(&ce->guc_active.lock); + guc_prio_fini(rq, ce); + spin_unlock(&ce->guc_active.lock); +} + static void sanitize_hwsp(struct intel_engine_cs *engine) { struct intel_timeline *tl; @@ -2317,6 +2503,10 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) guc->sched_engine->disabled = guc_sched_engine_disabled; guc->sched_engine->private_data = guc; guc->sched_engine->destroy = guc_sched_engine_destroy; + guc->sched_engine->bump_inflight_request_prio = + guc_bump_inflight_request_prio; + guc->sched_engine->retire_inflight_request_prio = + guc_retire_inflight_request_prio; tasklet_setup(&guc->sched_engine->tasklet, guc_submission_tasklet); } @@ -2694,6 +2884,22 @@ void intel_guc_submission_print_info(struct intel_guc *guc, drm_printf(p, "\n"); } +static inline void guc_log_context_priority(struct drm_printer *p, + struct intel_context *ce) +{ + int i; + + drm_printf(p, "\t\tPriority: %d\n", + ce->guc_prio); + drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); + for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; + i < GUC_CLIENT_PRIORITY_NUM; ++i) { + drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", + i, ce->guc_prio_count[i]); + } + drm_printf(p, "\n"); +} + void intel_guc_submission_print_context_info(struct intel_guc *guc, struct drm_printer *p) { @@ -2716,6 +2922,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", ce->guc_state.sched_state, atomic_read(&ce->guc_sched_state_no_lock)); + + guc_log_context_priority(p, ce); } } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 541a20371502..1f1d4a6a0eff 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -114,6 +114,9 @@ static void i915_fence_release(struct dma_fence *fence) { struct i915_request *rq = to_request(fence); + GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -924,6 +927,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ + rq->guc_prio = GUC_PRIO_INIT; + /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ac0e3326c067..e6a0e0ebc9aa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -293,6 +293,15 @@ struct i915_request { */ struct list_head guc_fence_link; + /** + * Priority level while the request is inflight. Differs from i915 + * scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + */ +#define GUC_PRIO_INIT 0xff +#define GUC_PRIO_FINI 0xfe + u8 guc_prio; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 28dd887eb1be..17843c204356 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -241,6 +241,9 @@ static void __i915_schedule(struct i915_sched_node *node, /* Fifo and depth-first replacement ensure our deps execute before us */ sched_engine = lock_sched_engine(node, sched_engine, &cache); list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_request *from = container_of(dep->signaler, + struct i915_request, + sched); INIT_LIST_HEAD(&dep->dfs_link); node = dep->signaler; @@ -254,6 +257,10 @@ static void __i915_schedule(struct i915_sched_node *node, GEM_BUG_ON(node_to_request(node)->engine->sched_engine != sched_engine); + /* Must be called before changing the nodes priority */ + if (sched_engine->bump_inflight_request_prio) + sched_engine->bump_inflight_request_prio(from, prio); + WRITE_ONCE(node->attr.priority, prio); /* diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index eaef233e9080..b0a1b58c7893 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -179,6 +179,18 @@ struct i915_sched_engine { void (*kick_backend)(const struct i915_request *rq, int prio); + /** + * @bump_inflight_request_prio: update priority of an inflight request + */ + void (*bump_inflight_request_prio)(struct i915_request *rq, + int prio); + + /** + * @retire_inflight_request_prio: indicate request is retired to + * priority tracking + */ + void (*retire_inflight_request_prio)(struct i915_request *rq); + /** * @schedule: adjust priority of request * diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 9613a7c19661..806ad688274b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -904,6 +904,7 @@ DECLARE_EVENT_CLASS(intel_context, __field(int, pin_count) __field(u32, sched_state) __field(u32, guc_sched_state_no_lock) + __field(u8, guc_prio) ), TP_fast_assign( @@ -912,12 +913,19 @@ DECLARE_EVENT_CLASS(intel_context, __entry->sched_state = ce->guc_state.sched_state; __entry->guc_sched_state_no_lock = atomic_read(&ce->guc_sched_state_no_lock); + __entry->guc_prio = ce->guc_prio; ), - TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x", + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", __entry->guc_id, __entry->pin_count, __entry->sched_state, - __entry->guc_sched_state_no_lock) + __entry->guc_sched_state_no_lock, + __entry->guc_prio) +); + +DEFINE_EVENT(intel_context, intel_context_set_prio, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) ); DEFINE_EVENT(intel_context, intel_context_reset, @@ -1017,6 +1025,11 @@ trace_i915_request_out(struct i915_request *rq) { } +static inline void +trace_intel_context_set_prio(struct intel_context *ce) +{ +} + static inline void trace_intel_context_reset(struct intel_context *ce) { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 975087553ea0..7f13d241417f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42