drm/i915/gt: Double check heartbeat timeout before resetting

Check that we have actually passed the heartbeat interval since last
checking the request before resetting the device.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2780
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210204211303.21347-2-chris@chris-wilson.co.uk
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Chris Wilson 2021-02-04 21:13:03 +00:00 committed by Daniel Vetter
parent 10c5585b51
commit 2827ce6e54

View file

@ -31,7 +31,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
delay = msecs_to_jiffies_timeout(delay);
if (delay >= HZ)
delay = round_jiffies_up_relative(delay);
mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay);
mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1);
return true;
}
@ -103,6 +103,13 @@ static void heartbeat(struct work_struct *wrk)
goto out;
if (engine->heartbeat.systole) {
long delay = READ_ONCE(engine->props.heartbeat_interval_ms);
/* Safeguard against too-fast worker invocations */
if (!time_after(jiffies,
rq->emitted_jiffies + msecs_to_jiffies(delay)))
goto out;
if (!i915_sw_fence_signaled(&rq->submit)) {
/*
* Not yet submitted, system is stalled.
@ -140,6 +147,8 @@ static void heartbeat(struct work_struct *wrk)
"stopped heartbeat on %s",
engine->name);
}
rq->emitted_jiffies = jiffies;
goto out;
}