Fixes for GPU hang, null dereference, suspend-resume, power consumption, and use-after-free.

- Program mocs:63 for cache eviction on gen9 (Chris)
 - Protect context lifetime with RCU (Chris)
 - Split the breadcrumb spinlock between global and contexts (Chris)
 - Retain default context state across shrinking (Venkata)
 - Limit frequency drop to RPe on parking (Chris)
 - Return earlier from intel_modeset_init() without display (Jani)
 - Defer initial modeset until after GGTT is initialized (Chris)
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAl/I67QACgkQ+mJfZA7r
 E8q8tQf/d9TPhOi7YPYli2L5vPvJ4iEBA+4siXFOguZFRJ5JpYEW/AfSrk8F3Daa
 Klmfrs2RSGiX+6Fh/iiDiI+A/uyW4GUz9i0EZXu8a8gUNR2nvzpn9etPwHxQMLzy
 LMfhxH+PQoyq9F+hLwcGPCRXAhsh2ztuimG0GNA36PCKZn0zs0h+9L4GnVBg9LP6
 i1ZTnl8yLNJ/Lk5UQu/lY4oTXMw5Yv/UJ7Tjw2N7+CeZQZkYBkP5HMy0/Jw5VQGd
 3cRk9ibG/U9IkMyt8uIpC0b6HLPiazWZhyUAF+u1AAN5iJLINtandbsiIH8gZZhq
 a3CIXqKuYrHRSl6EIDigsvNCgEGm9Q==
 =yhLk
 -----END PGP SIGNATURE-----

Merge tag 'drm-intel-fixes-2020-12-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

Fixes for GPU hang, null dereference, suspend-resume, power consumption, and use-after-free.

- Program mocs:63 for cache eviction on gen9 (Chris)
- Protect context lifetime with RCU (Chris)
- Split the breadcrumb spinlock between global and contexts (Chris)
- Retain default context state across shrinking (Venkata)
- Limit frequency drop to RPe on parking (Chris)
- Return earlier from intel_modeset_init() without display (Jani)
- Defer initial modeset until after GGTT is initialized (Chris)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201203134705.GA1575873@intel.com
This commit is contained in:
Dave Airlie 2020-12-04 11:45:37 +10:00
commit 94cfbd05e4
9 changed files with 143 additions and 124 deletions

View file

@ -18021,16 +18021,6 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915)
if (!HAS_GMCH(i915))
sanitize_watermarks(i915);
/*
* Force all active planes to recompute their states. So that on
* mode_setcrtc after probe, all the intel_plane_state variables
* are already calculated and there is no assert_plane warnings
* during bootup.
*/
ret = intel_initial_commit(dev);
if (ret)
drm_dbg_kms(&i915->drm, "Initial commit in probe failed.\n");
return 0;
}
@ -18039,11 +18029,21 @@ int intel_modeset_init(struct drm_i915_private *i915)
{
int ret;
intel_overlay_setup(i915);
if (!HAS_DISPLAY(i915))
return 0;
/*
* Force all active planes to recompute their states. So that on
* mode_setcrtc after probe, all the intel_plane_state variables
* are already calculated and there is no assert_plane warnings
* during bootup.
*/
ret = intel_initial_commit(&i915->drm);
if (ret)
return ret;
intel_overlay_setup(i915);
ret = intel_fbdev_init(&i915->drm);
if (ret)
return ret;

View file

@ -101,18 +101,37 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
intel_gt_pm_put_async(b->irq_engine->gt);
}
static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
spin_lock(&b->irq_lock);
if (b->irq_armed)
__intel_breadcrumbs_disarm_irq(b);
spin_unlock(&b->irq_lock);
}
static void add_signaling_context(struct intel_breadcrumbs *b,
struct intel_context *ce)
{
intel_context_get(ce);
list_add_tail(&ce->signal_link, &b->signalers);
lockdep_assert_held(&ce->signal_lock);
spin_lock(&b->signalers_lock);
list_add_rcu(&ce->signal_link, &b->signalers);
spin_unlock(&b->signalers_lock);
}
static void remove_signaling_context(struct intel_breadcrumbs *b,
static bool remove_signaling_context(struct intel_breadcrumbs *b,
struct intel_context *ce)
{
list_del(&ce->signal_link);
intel_context_put(ce);
lockdep_assert_held(&ce->signal_lock);
if (!list_empty(&ce->signals))
return false;
spin_lock(&b->signalers_lock);
list_del_rcu(&ce->signal_link);
spin_unlock(&b->signalers_lock);
return true;
}
static inline bool __request_completed(const struct i915_request *rq)
@ -175,6 +194,8 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
static bool __signal_request(struct i915_request *rq)
{
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
if (!__dma_fence_signal(&rq->fence)) {
i915_request_put(rq);
return false;
@ -195,15 +216,12 @@ static void signal_irq_work(struct irq_work *work)
struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
const ktime_t timestamp = ktime_get();
struct llist_node *signal, *sn;
struct intel_context *ce, *cn;
struct list_head *pos, *next;
struct intel_context *ce;
signal = NULL;
if (unlikely(!llist_empty(&b->signaled_requests)))
signal = llist_del_all(&b->signaled_requests);
spin_lock(&b->irq_lock);
/*
* Keep the irq armed until the interrupt after all listeners are gone.
*
@ -229,47 +247,44 @@ static void signal_irq_work(struct irq_work *work)
* interrupt draw less ire from other users of the system and tools
* like powertop.
*/
if (!signal && b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
intel_breadcrumbs_disarm_irq(b);
list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
GEM_BUG_ON(list_empty(&ce->signals));
rcu_read_lock();
list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
struct i915_request *rq;
list_for_each_safe(pos, next, &ce->signals) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
bool release;
GEM_BUG_ON(!check_signal_order(ce, rq));
if (!__request_completed(rq))
break;
if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
&rq->fence.flags))
break;
/*
* Queue for execution after dropping the signaling
* spinlock as the callback chain may end up adding
* more signalers to the same context or engine.
*/
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
spin_lock(&ce->signal_lock);
list_del_rcu(&rq->signal_link);
release = remove_signaling_context(b, ce);
spin_unlock(&ce->signal_lock);
if (__signal_request(rq))
/* We own signal_node now, xfer to local list */
signal = slist_add(&rq->signal_node, signal);
}
/*
* We process the list deletion in bulk, only using a list_add
* (not list_move) above but keeping the status of
* rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
*/
if (!list_is_first(pos, &ce->signals)) {
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
if (&ce->signals == pos) { /* now empty */
if (release) {
add_retire(b, ce->timeline);
remove_signaling_context(b, ce);
intel_context_put(ce);
}
}
}
spin_unlock(&b->irq_lock);
rcu_read_unlock();
llist_for_each_safe(signal, sn, signal) {
struct i915_request *rq =
@ -298,14 +313,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
if (!b)
return NULL;
spin_lock_init(&b->irq_lock);
b->irq_engine = irq_engine;
spin_lock_init(&b->signalers_lock);
INIT_LIST_HEAD(&b->signalers);
init_llist_head(&b->signaled_requests);
spin_lock_init(&b->irq_lock);
init_irq_work(&b->irq_work, signal_irq_work);
b->irq_engine = irq_engine;
return b;
}
@ -347,9 +363,9 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
kfree(b);
}
static void insert_breadcrumb(struct i915_request *rq,
struct intel_breadcrumbs *b)
static void insert_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
@ -371,6 +387,7 @@ static void insert_breadcrumb(struct i915_request *rq,
}
if (list_empty(&ce->signals)) {
intel_context_get(ce);
add_signaling_context(b, ce);
pos = &ce->signals;
} else {
@ -396,8 +413,9 @@ static void insert_breadcrumb(struct i915_request *rq,
break;
}
}
list_add(&rq->signal_link, pos);
list_add_rcu(&rq->signal_link, pos);
GEM_BUG_ON(!check_signal_order(ce, rq));
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
/*
@ -410,7 +428,7 @@ static void insert_breadcrumb(struct i915_request *rq,
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b;
struct intel_context *ce = rq->context;
/* Serialises with i915_request_retire() using rq->lock */
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@ -425,67 +443,30 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
return true;
/*
* rq->engine is locked by rq->engine->active.lock. That however
* is not known until after rq->engine has been dereferenced and
* the lock acquired. Hence we acquire the lock and then validate
* that rq->engine still matches the lock we hold for it.
*
* Here, we are using the breadcrumb lock as a proxy for the
* rq->engine->active.lock, and we know that since the breadcrumb
* will be serialised within i915_request_submit/i915_request_unsubmit,
* the engine cannot change while active as long as we hold the
* breadcrumb lock on that engine.
*
* From the dma_fence_enable_signaling() path, we are outside of the
* request submit/unsubmit path, and so we must be more careful to
* acquire the right lock.
*/
b = READ_ONCE(rq->engine)->breadcrumbs;
spin_lock(&b->irq_lock);
while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
spin_unlock(&b->irq_lock);
b = READ_ONCE(rq->engine)->breadcrumbs;
spin_lock(&b->irq_lock);
}
/*
* Now that we are finally serialised with request submit/unsubmit,
* [with b->irq_lock] and with i915_request_retire() [via checking
* SIGNALED with rq->lock] confirm the request is indeed active. If
* it is no longer active, the breadcrumb will be attached upon
* i915_request_submit().
*/
spin_lock(&ce->signal_lock);
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
insert_breadcrumb(rq, b);
spin_unlock(&b->irq_lock);
insert_breadcrumb(rq);
spin_unlock(&ce->signal_lock);
return true;
}
void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
struct intel_context *ce = rq->context;
bool release;
/*
* We must wait for b->irq_lock so that we know the interrupt handler
* has released its reference to the intel_context and has completed
* the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
* required).
*/
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
struct intel_context *ce = rq->context;
if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
return;
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
remove_signaling_context(b, ce);
spin_lock(&ce->signal_lock);
list_del_rcu(&rq->signal_link);
release = remove_signaling_context(rq->engine->breadcrumbs, ce);
spin_unlock(&ce->signal_lock);
if (release)
intel_context_put(ce);
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
i915_request_put(rq);
}
spin_unlock(&b->irq_lock);
i915_request_put(rq);
}
static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
@ -495,18 +476,17 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
drm_printf(p, "Signals:\n");
spin_lock_irq(&b->irq_lock);
list_for_each_entry(ce, &b->signalers, signal_link) {
list_for_each_entry(rq, &ce->signals, signal_link) {
rcu_read_lock();
list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
list_for_each_entry_rcu(rq, &ce->signals, signal_link)
drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
rq->fence.context, rq->fence.seqno,
i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
jiffies_to_msecs(jiffies - rq->emitted_jiffies));
}
}
spin_unlock_irq(&b->irq_lock);
rcu_read_unlock();
}
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,

View file

@ -29,18 +29,16 @@
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects the lists used in hardirq context */
/* Not all breadcrumbs are attached to physical HW */
struct intel_engine_cs *irq_engine;
spinlock_t signalers_lock; /* protects the list of signalers */
struct list_head signalers;
struct llist_head signaled_requests;
spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
bool irq_armed;
};

View file

@ -25,9 +25,16 @@ static struct intel_context *intel_context_alloc(void)
return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
}
static void rcu_context_free(struct rcu_head *rcu)
{
struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
kmem_cache_free(global.slab_ce, ce);
}
void intel_context_free(struct intel_context *ce)
{
kmem_cache_free(global.slab_ce, ce);
call_rcu(&ce->rcu, rcu_context_free);
}
struct intel_context *
@ -356,8 +363,7 @@ static int __intel_context_active(struct i915_active *active)
}
void
intel_context_init(struct intel_context *ce,
struct intel_engine_cs *engine)
intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
{
GEM_BUG_ON(!engine->cops);
GEM_BUG_ON(!engine->gt->vm);
@ -373,7 +379,8 @@ intel_context_init(struct intel_context *ce,
ce->vm = i915_vm_get(engine->gt->vm);
INIT_LIST_HEAD(&ce->signal_link);
/* NB ce->signal_link/lock is used under RCU */
spin_lock_init(&ce->signal_lock);
INIT_LIST_HEAD(&ce->signals);
mutex_init(&ce->pin_mutex);

View file

@ -25,6 +25,7 @@ DECLARE_EWMA(runtime, 3, 8);
struct i915_gem_context;
struct i915_gem_ww_ctx;
struct i915_vma;
struct intel_breadcrumbs;
struct intel_context;
struct intel_ring;
@ -44,7 +45,16 @@ struct intel_context_ops {
};
struct intel_context {
struct kref ref;
/*
* Note: Some fields may be accessed under RCU.
*
* Unless otherwise noted a field can safely be assumed to be protected
* by strong reference counting.
*/
union {
struct kref ref; /* no kref_get_unless_zero()! */
struct rcu_head rcu;
};
struct intel_engine_cs *engine;
struct intel_engine_cs *inflight;
@ -54,8 +64,15 @@ struct intel_context {
struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context;
struct list_head signal_link;
struct list_head signals;
/*
* @signal_lock protects the list of requests that need signaling,
* @signals. While there are any requests that need signaling,
* we add the context to the breadcrumbs worker, and remove it
* upon completion/cancellation of the last request.
*/
struct list_head signal_link; /* Accessed under RCU */
struct list_head signals; /* Guarded by signal_lock */
spinlock_t signal_lock; /* protects signals, the list of requests */
struct i915_vma *state;
struct intel_ring *ring;

View file

@ -131,7 +131,19 @@ static const struct drm_i915_mocs_entry skl_mocs_table[] = {
GEN9_MOCS_ENTRIES,
MOCS_ENTRY(I915_MOCS_CACHED,
LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
L3_3_WB)
L3_3_WB),
/*
* mocs:63
* - used by the L3 for all of its evictions.
* Thus it is expected to allow LLC cacheability to enable coherent
* flows to be maintained.
* - used to force L3 uncachable cycles.
* Thus it is expected to make the surface L3 uncacheable.
*/
MOCS_ENTRY(63,
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
L3_1_UC)
};
/* NOTE: the LE_TGT_CACHE is not used on Broxton */

View file

@ -883,6 +883,10 @@ void intel_rps_park(struct intel_rps *rps)
adj = -2;
rps->last_adj = adj;
rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
if (rps->cur_freq < rps->efficient_freq) {
rps->cur_freq = rps->efficient_freq;
rps->last_adj = 0;
}
GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
}

View file

@ -103,10 +103,13 @@ static int __shmem_rw(struct file *file, loff_t off,
return PTR_ERR(page);
vaddr = kmap(page);
if (write)
if (write) {
memcpy(vaddr + offset_in_page(off), ptr, this);
else
set_page_dirty(page);
} else {
memcpy(ptr, vaddr + offset_in_page(off), this);
}
mark_page_accessed(page);
kunmap(page);
put_page(page);

View file

@ -177,10 +177,8 @@ struct i915_request {
struct intel_ring *ring;
struct intel_timeline __rcu *timeline;
union {
struct list_head signal_link;
struct llist_node signal_node;
};
struct list_head signal_link;
struct llist_node signal_node;
/*
* The rcu epoch of when this request was allocated. Used to judiciously