diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 148be8e1a090..f0556310b851 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -133,3 +133,9 @@ depends on DRM_I915 depends on EXPERT source "drivers/gpu/drm/i915/Kconfig.debug" endmenu + +menu "drm/i915 Profile Guided Optimisation" + visible if EXPERT + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile new file mode 100644 index 000000000000..0e5db98da8f3 --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -0,0 +1,13 @@ +config DRM_I915_SPIN_REQUEST + int + default 5 # microseconds + help + Before sleeping waiting for a request (GPU operation) to complete, + we may spend some time polling for its completion. As the IRQ may + take a non-negligible time to setup, we do a short spin first to + check if the request will complete in the time it would have taken + us to enable the interrupt. + + May be 0 to disable the initial spin. In practice, we estimate + the cost of enabling the interrupt (if currently disabled) to be + a few microseconds. diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..b1f00b59bb95 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1340,8 +1340,31 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, state, 5)) + /* + * Optimistic spin before touching IRQs. + * + * We may use a rather large value here to offset the penalty of + * switching away from the active task. Frequently, the client will + * wait upon an old swapbuffer to throttle itself to remain within a + * frame of the gpu. If the client is running in lockstep with the gpu, + * then it should not be waiting long at all, and a sleep now will incur + * extra scheduler latency in producing the next frame. To try to + * avoid adding the cost of enabling/disabling the interrupt to the + * short wait, we first spin to see if the request would have completed + * in the time taken to setup the interrupt. + * + * We need upto 5us to enable the irq, and upto 20us to hide the + * scheduler latency of a context switch, ignoring the secondary + * impacts from a context switch such as cache eviction. + * + * The scheme used for low-latency IO is called "hybrid interrupt + * polling". The suggestion there is to sleep until just before you + * expect to be woken by the device interrupt and then poll for its + * completion. That requires having a good predictor for the request + * duration, which we currently lack. + */ + if (CONFIG_DRM_I915_SPIN_REQUEST && + __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) goto out; /*