linux-stable/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
Chris Wilson 27a5dcfe73 drm/i915/gem: Remove disordered per-file request list for throttling
I915_GEM_THROTTLE dates back to the time before contexts where there was
just a single engine, and therefore a single timeline and request list
globally. That request list was in execution/retirement order, and so
walking it to find a particular aged request made sense and could be
split per file.

That is no more. We now have many timelines with a file, as many as the
user wants to construct (essentially per-engine, per-context). Each of
those run independently and so make the single list futile. Remove the
disordered list, and iterate over all the timelines to find a request to
wait on in each to satisfy the criteria that the CPU is no more than 20ms
ahead of its oldest request.

It should go without saying that the I915_GEM_THROTTLE ioctl is no
longer used as the primary means of throttling, so it makes sense to push
the complication into the ioctl where it only impacts upon its few
irregular users, rather than the execbuf/retire where everybody has to
pay the cost. Fortunately, the few users do not create vast amount of
contexts, so the loops over contexts/engines should be concise.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200728152010.30701-1-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-09-07 13:13:50 +03:00

100 lines
2.5 KiB
C

/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2014-2016 Intel Corporation
*/
#include <linux/jiffies.h>
#include <drm/drm_file.h>
#include "i915_drv.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
/*
* 20ms is a fairly arbitrary limit (greater than the average frame time)
* chosen to prevent the CPU getting more than a frame ahead of the GPU
* (when using lax throttling for the frontbuffer). We also use it to
* offer free GPU waitboosts for severely congested workloads.
*/
#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
/*
* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
* Note that if we were to use the current jiffies each time around the loop,
* we wouldn't escape the function with any frames outstanding if the time to
* render a frame was over 20ms.
*
* This should get us reasonable parallelism between CPU and GPU but also
* relatively low latency when blocking on a particular request to finish.
*/
int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_file_private *file_priv = file->driver_priv;
struct i915_gem_context *ctx;
unsigned long idx;
long ret;
/* ABI: return -EIO if already wedged */
ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
if (ret)
return ret;
rcu_read_lock();
xa_for_each(&file_priv->context_xa, idx, ctx) {
struct i915_gem_engines_iter it;
struct intel_context *ce;
if (!kref_get_unless_zero(&ctx->ref))
continue;
rcu_read_unlock();
for_each_gem_engine(ce,
i915_gem_context_lock_engines(ctx),
it) {
struct i915_request *rq, *target = NULL;
if (!ce->timeline)
continue;
mutex_lock(&ce->timeline->mutex);
list_for_each_entry_reverse(rq,
&ce->timeline->requests,
link) {
if (i915_request_completed(rq))
break;
if (time_after(rq->emitted_jiffies,
recent_enough))
continue;
target = i915_request_get(rq);
break;
}
mutex_unlock(&ce->timeline->mutex);
if (!target)
continue;
ret = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
i915_request_put(target);
if (ret < 0)
break;
}
i915_gem_context_unlock_engines(ctx);
i915_gem_context_put(ctx);
rcu_read_lock();
}
rcu_read_unlock();
return ret < 0 ? ret : 0;
}