Merge tag 'drm-intel-gt-next-2021-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:

- Expose multi-LRC submission interface

  Similar to the bonded submission interface but simplified.
  Comes with GuC only implementation for now. See kerneldoc
  for more details.

  Userspace changes: https://github.com/intel/media-driver/pull/1252

- Expose logical engine instance to user

  Needed by the multi-LRC submission interface for GuC

  Userspace changes: https://github.com/intel/media-driver/pull/1252

Driver Changes:

- Fix blank screen booting crashes when CONFIG_CC_OPTIMIZE_FOR_SIZE=y (Hugh)
- Add support for multi-LRC submission in the GuC backend (Matt B)
- Add extra cache flushing before making pages userspace visible (Matt A, Thomas)
- Mark internal GPU object pages dirty so they will be flushed properly (Matt A)

- Move remaining debugfs interfaces i915_wedged/i915_forcewake_user into gt (Andi)
- Replace the unconditional clflushes with drm_clflush_virt_range() (Ville)
- Remove IS_ACTIVE macro completely (Lucas)
- Improve kerneldocs for cache_dirty (Matt A)

- Add missing includes (Lucas)
- Selftest improvements (Matt R, Ran, Matt A)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YXFmLKoq8Fg9JxSd@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
Dave Airlie 2021-10-22 06:30:33 +10:00
commit 6f2f7c8330
52 changed files with 3239 additions and 772 deletions

View File

@ -1,122 +0,0 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2021 Intel Corporation
*/
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
/**
* struct drm_i915_context_engines_parallel_submit - Configure engine for
* parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior described below. Lastly, the default is to not allow BBs to
* preempted mid BB rather insert coordinated preemption on all hardware
* contexts between each set of BBs. Flags may be added in the future to change
* both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* interface.
*
* .. code-block:: none
*
* Example 1 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can also be thought of as 2 virtual engines described by 2-D array
* in the engines the field with bonds placed between each index of the
* virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
* CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logical contiguous, return -EINVAL
*/
struct drm_i915_context_engines_parallel_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine
*/
__u16 width;
/**
* @num_siblings: number of siblings per context
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
struct i915_engine_class_instance engines[0];
} __packed;

View File

@ -135,8 +135,8 @@ Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and
drm_i915_context_engines_parallel_submit to the uAPI to implement this
extension.
.. kernel-doc:: Documentation/gpu/rfc/i915_parallel_execbuf.h
:functions: drm_i915_context_engines_parallel_submit
.. kernel-doc:: include/uapi/drm/i915_drm.h
:functions: i915_context_engines_parallel_submit
Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL
-------------------------------------------------------------------

View File

@ -4,6 +4,8 @@
* Copyright © 2014-2016 Intel Corporation
*/
#include <linux/dma-fence-array.h>
#include "gt/intel_engine.h"
#include "i915_gem_ioctls.h"
@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
}
static __always_inline unsigned int
__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
{
const struct i915_request *rq;
@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
* to eventually flush us, but to minimise latency just ask the
* hardware.
*
* Note we only report on the status of native fences.
* Note we only report on the status of native fences and we currently
* have two native fences:
*
* 1. A composite fence (dma_fence_array) constructed of i915 requests
* created during a parallel submission. In this case we deconstruct the
* composite fence into individual i915 requests and check the status of
* each request.
*
* 2. A single i915 request.
*/
if (!dma_fence_is_i915(fence))
return 0;
if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence);
struct dma_fence **child = array->fences;
unsigned int nchild = array->num_fences;
/* opencode to_request() in order to avoid const warnings */
rq = container_of(fence, const struct i915_request, fence);
if (i915_request_completed(rq))
return 0;
do {
struct dma_fence *current_fence = *child++;
/* Beware type-expansion follies! */
BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
return flag(rq->engine->uabi_class);
/* Not an i915 fence, can't be busy per above */
if (!dma_fence_is_i915(current_fence) ||
!test_bit(I915_FENCE_FLAG_COMPOSITE,
&current_fence->flags)) {
return 0;
}
rq = to_request(current_fence);
if (!i915_request_completed(rq))
return flag(rq->engine->uabi_class);
} while (--nchild);
/* All requests in array complete, not busy */
return 0;
} else {
if (!dma_fence_is_i915(fence))
return 0;
rq = to_request(fence);
if (i915_request_completed(rq))
return 0;
/* Beware type-expansion follies! */
BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
return flag(rq->engine->uabi_class);
}
}
static __always_inline unsigned int
busy_check_reader(const struct dma_fence *fence)
busy_check_reader(struct dma_fence *fence)
{
return __busy_set_if_active(fence, __busy_read_flag);
}
static __always_inline unsigned int
busy_check_writer(const struct dma_fence *fence)
busy_check_writer(struct dma_fence *fence)
{
if (!fence)
return 0;

View File

@ -556,9 +556,147 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
return 0;
}
static int
set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
void *data)
{
struct i915_context_engines_parallel_submit __user *ext =
container_of_user(base, typeof(*ext), base);
const struct set_proto_ctx_engines *set = data;
struct drm_i915_private *i915 = set->i915;
u64 flags;
int err = 0, n, i, j;
u16 slot, width, num_siblings;
struct intel_engine_cs **siblings = NULL;
intel_engine_mask_t prev_mask;
/* FIXME: This is NIY for execlists */
if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
return -ENODEV;
if (get_user(slot, &ext->engine_index))
return -EFAULT;
if (get_user(width, &ext->width))
return -EFAULT;
if (get_user(num_siblings, &ext->num_siblings))
return -EFAULT;
if (slot >= set->num_engines) {
drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
slot, set->num_engines);
return -EINVAL;
}
if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
drm_dbg(&i915->drm,
"Invalid placement[%d], already occupied\n", slot);
return -EINVAL;
}
if (get_user(flags, &ext->flags))
return -EFAULT;
if (flags) {
drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
return -EINVAL;
}
for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
err = check_user_mbz(&ext->mbz64[n]);
if (err)
return err;
}
if (width < 2) {
drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
return -EINVAL;
}
if (num_siblings < 1) {
drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
num_siblings);
return -EINVAL;
}
siblings = kmalloc_array(num_siblings * width,
sizeof(*siblings),
GFP_KERNEL);
if (!siblings)
return -ENOMEM;
/* Create contexts / engines */
for (i = 0; i < width; ++i) {
intel_engine_mask_t current_mask = 0;
struct i915_engine_class_instance prev_engine;
for (j = 0; j < num_siblings; ++j) {
struct i915_engine_class_instance ci;
n = i * num_siblings + j;
if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
err = -EFAULT;
goto out_err;
}
siblings[n] =
intel_engine_lookup_user(i915, ci.engine_class,
ci.engine_instance);
if (!siblings[n]) {
drm_dbg(&i915->drm,
"Invalid sibling[%d]: { class:%d, inst:%d }\n",
n, ci.engine_class, ci.engine_instance);
err = -EINVAL;
goto out_err;
}
if (n) {
if (prev_engine.engine_class !=
ci.engine_class) {
drm_dbg(&i915->drm,
"Mismatched class %d, %d\n",
prev_engine.engine_class,
ci.engine_class);
err = -EINVAL;
goto out_err;
}
}
prev_engine = ci;
current_mask |= siblings[n]->logical_mask;
}
if (i > 0) {
if (current_mask != prev_mask << 1) {
drm_dbg(&i915->drm,
"Non contiguous logical mask 0x%x, 0x%x\n",
prev_mask, current_mask);
err = -EINVAL;
goto out_err;
}
}
prev_mask = current_mask;
}
set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
set->engines[slot].num_siblings = num_siblings;
set->engines[slot].width = width;
set->engines[slot].siblings = siblings;
return 0;
out_err:
kfree(siblings);
return err;
}
static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
[I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
set_proto_ctx_engines_parallel_submit,
};
static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
@ -794,6 +932,7 @@ static int intel_context_set_gem(struct intel_context *ce,
GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
RCU_INIT_POINTER(ce->gem_context, ctx);
GEM_BUG_ON(intel_context_is_pinned(ce));
ce->ring_size = SZ_16K;
i915_vm_put(ce->vm);
@ -818,6 +957,25 @@ static int intel_context_set_gem(struct intel_context *ce,
return ret;
}
static void __unpin_engines(struct i915_gem_engines *e, unsigned int count)
{
while (count--) {
struct intel_context *ce = e->engines[count], *child;
if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags))
continue;
for_each_child(ce, child)
intel_context_unpin(child);
intel_context_unpin(ce);
}
}
static void unpin_engines(struct i915_gem_engines *e)
{
__unpin_engines(e, e->num_engines);
}
static void __free_engines(struct i915_gem_engines *e, unsigned int count)
{
while (count--) {
@ -933,6 +1091,40 @@ free_engines:
return err;
}
static int perma_pin_contexts(struct intel_context *ce)
{
struct intel_context *child;
int i = 0, j = 0, ret;
GEM_BUG_ON(!intel_context_is_parent(ce));
ret = intel_context_pin(ce);
if (unlikely(ret))
return ret;
for_each_child(ce, child) {
ret = intel_context_pin(child);
if (unlikely(ret))
goto unwind;
++i;
}
set_bit(CONTEXT_PERMA_PIN, &ce->flags);
return 0;
unwind:
intel_context_unpin(ce);
for_each_child(ce, child) {
if (j++ < i)
intel_context_unpin(child);
else
break;
}
return ret;
}
static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
unsigned int num_engines,
struct i915_gem_proto_engine *pe)
@ -946,7 +1138,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
e->num_engines = num_engines;
for (n = 0; n < num_engines; n++) {
struct intel_context *ce;
struct intel_context *ce, *child;
int ret;
switch (pe[n].type) {
@ -956,7 +1148,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
case I915_GEM_ENGINE_TYPE_BALANCED:
ce = intel_engine_create_virtual(pe[n].siblings,
pe[n].num_siblings);
pe[n].num_siblings, 0);
break;
case I915_GEM_ENGINE_TYPE_PARALLEL:
ce = intel_engine_create_parallel(pe[n].siblings,
pe[n].num_siblings,
pe[n].width);
break;
case I915_GEM_ENGINE_TYPE_INVALID:
@ -977,6 +1175,30 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
err = ERR_PTR(ret);
goto free_engines;
}
for_each_child(ce, child) {
ret = intel_context_set_gem(child, ctx, pe->sseu);
if (ret) {
err = ERR_PTR(ret);
goto free_engines;
}
}
/*
* XXX: Must be done after calling intel_context_set_gem as that
* function changes the ring size. The ring is allocated when
* the context is pinned. If the ring size is changed after
* allocation we have a mismatch of the ring size and will cause
* the context to hang. Presumably with a bit of reordering we
* could move the perma-pin step to the backend function
* intel_engine_create_parallel.
*/
if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) {
ret = perma_pin_contexts(ce);
if (ret) {
err = ERR_PTR(ret);
goto free_engines;
}
}
}
return e;
@ -1219,6 +1441,7 @@ static void context_close(struct i915_gem_context *ctx)
/* Flush any concurrent set_engines() */
mutex_lock(&ctx->engines_mutex);
unpin_engines(__context_engines_static(ctx));
engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
i915_gem_context_set_closed(ctx);
mutex_unlock(&ctx->engines_mutex);

View File

@ -78,13 +78,16 @@ enum i915_gem_engine_type {
/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
I915_GEM_ENGINE_TYPE_BALANCED,
/** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
I915_GEM_ENGINE_TYPE_PARALLEL,
};
/**
* struct i915_gem_proto_engine - prototype engine
*
* This struct describes an engine that a context may contain. Engines
* have three types:
* have four types:
*
* - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
* show up as a NULL in i915_gem_engines::engines[i] and any attempt to
@ -97,6 +100,10 @@ enum i915_gem_engine_type {
*
* - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
* i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
*
* - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described
* i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and
* i915_gem_proto_engine::siblings.
*/
struct i915_gem_proto_engine {
/** @type: Type of this engine */
@ -105,10 +112,13 @@ struct i915_gem_proto_engine {
/** @engine: Engine, for physical */
struct intel_engine_cs *engine;
/** @num_siblings: Number of balanced siblings */
/** @num_siblings: Number of balanced or parallel siblings */
unsigned int num_siblings;
/** @siblings: Balanced siblings */
/** @width: Width of each sibling */
unsigned int width;
/** @siblings: Balanced siblings or num_siblings * width for parallel */
struct intel_engine_cs **siblings;
/** @sseu: Client-set SSEU parameters */

View File

@ -232,6 +232,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages;
unsigned int sg_page_sizes;
@ -242,8 +243,11 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
if (IS_ERR(pages))
return PTR_ERR(pages);
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
/* XXX: consider doing a vmap flush or something */
if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
wbinvd_on_all_cpus();
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
return 0;
@ -301,7 +305,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
}
drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0);
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class,
I915_BO_ALLOC_USER);
obj->base.import_attach = attach;
obj->base.resv = dma_buf->resv;

File diff suppressed because it is too large Load Diff

View File

@ -134,6 +134,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
internal_free_pages(pages);
obj->mm.dirty = false;
__start_cpu_write(obj);
}
static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {

View File

@ -128,6 +128,32 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
}
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
/*
* This is purely from a security perspective, so we simply don't care
* about non-userspace objects being able to bypass the LLC.
*/
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
/*
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the
* kernel, as per the given object cache_level. This is troublesome
* since the heavy flush we apply when first gathering the pages is
* skipped if the kernel thinks the object is coherent with the GPU. As
* a result it might be possible to bypass the cache and read the
* contents of the page directly, which could be stale data. If it's
* just a case of userspace shooting themselves in the foot then so be
* it, but since i915 takes the stance of always zeroing memory before
* handing it to userspace, we need to prevent this.
*/
return IS_JSL_EHL(i915);
}
static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem);

View File

@ -514,6 +514,7 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);

View File

@ -427,6 +427,33 @@ struct drm_i915_gem_object {
* can freely bypass the CPU cache when touching the pages with the GPU,
* where the kernel is completely unaware. On such platform we need
* apply the sledgehammer-on-acquire regardless of the @cache_coherent.
*
* Special care is taken on non-LLC platforms, to prevent potential
* information leak. The driver currently ensures:
*
* 1. All userspace objects, by default, have @cache_level set as
* I915_CACHE_NONE. The only exception is userptr objects, where we
* instead force I915_CACHE_LLC, but we also don't allow userspace to
* ever change the @cache_level for such objects. Another special case
* is dma-buf, which doesn't rely on @cache_dirty, but there we
* always do a forced flush when acquiring the pages, if there is a
* chance that the pages can be read directly from main memory with
* the GPU.
*
* 2. All I915_CACHE_NONE objects have @cache_dirty initially true.
*
* 3. All swapped-out objects(i.e shmem) have @cache_dirty set to
* true.
*
* 4. The @cache_dirty is never freely reset before the initial
* flush, even if userspace adjusts the @cache_level through the
* i915_gem_set_caching_ioctl.
*
* 5. All @cache_dirty objects(including swapped-in) are initially
* flushed with a synchronous call to drm_clflush_sg in
* __i915_gem_object_set_pages. The @cache_dirty can be freely reset
* at this point. All further asynchronous clfushes are never security
* critical, i.e userspace is free to race against itself.
*/
unsigned int cache_dirty:1;

View File

@ -182,22 +182,7 @@ rebuild_st:
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
/*
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the
* kernel, as per the given object cache_level. This is troublesome
* since the heavy flush we apply when first gathering the pages is
* skipped if the kernel thinks the object is coherent with the GPU. As
* a result it might be possible to bypass the cache and read the
* contents of the page directly, which could be stale data. If it's
* just a case of userspace shooting themselves in the foot then so be
* it, but since i915 takes the stance of always zeroing memory before
* handing it to userspace, we need to prevent this.
*
* By setting cache_dirty here we make the clflush in set_pages
* unconditional on such platforms.
*/
if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
@ -301,6 +286,8 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
bool needs_clflush)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
if (obj->mm.madv == I915_MADV_DONTNEED)
@ -312,6 +299,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
drm_clflush_sg(pages);
__start_cpu_write(obj);
/*
* On non-LLC platforms, force the flush-on-acquire if this is ever
* swapped-in. Our async flush path is not trust worthy enough yet(and
* happens in the wrong order), and with some tricks it's conceivable
* for userspace to change the cache-level to I915_CACHE_NONE after the
* pages are swapped-in, and since execbuf binds the object before doing
* the async flush, we have a race window.
*/
if (!HAS_LLC(i915))
obj->cache_dirty = true;
}
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)

View File

@ -165,8 +165,11 @@ alloc_table:
goto err;
}
sg_page_sizes = i915_sg_dma_sizes(st->sgl);
WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE));
if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
sg_page_sizes = i915_sg_dma_sizes(st->sgl);
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@ -546,7 +549,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENOMEM;
drm_gem_private_object_init(dev, &obj->base, args->user_size);
i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0);
i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class,
I915_BO_ALLOC_USER);
obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;

View File

@ -136,6 +136,8 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
huge_pages_free_pages(pages);
obj->mm.dirty = false;
__start_cpu_write(obj);
}
static const struct drm_i915_gem_object_ops huge_page_ops = {
@ -152,6 +154,7 @@ huge_pages_object(struct drm_i915_private *i915,
{
static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
unsigned int cache_level;
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
@ -173,7 +176,9 @@ huge_pages_object(struct drm_i915_private *i915,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_NONE;
cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->mm.page_mask = page_mask;

View File

@ -17,13 +17,20 @@
#include "huge_gem_object.h"
#include "mock_context.h"
enum client_tiling {
CLIENT_TILING_LINEAR,
CLIENT_TILING_X,
CLIENT_TILING_Y,
CLIENT_NUM_TILING_TYPES
};
#define WIDTH 512
#define HEIGHT 32
struct blit_buffer {
struct i915_vma *vma;
u32 start_val;
u32 tiling;
enum client_tiling tiling;
};
struct tiled_blits {
@ -53,9 +60,9 @@ static int prepare_blit(const struct tiled_blits *t,
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
if (src->tiling == I915_TILING_Y)
if (src->tiling == CLIENT_TILING_Y)
cmd |= BCS_SRC_Y;
if (dst->tiling == I915_TILING_Y)
if (dst->tiling == CLIENT_TILING_Y)
cmd |= BCS_DST_Y;
*cs++ = cmd;
@ -172,7 +179,7 @@ static int tiled_blits_create_buffers(struct tiled_blits *t,
t->buffers[i].vma = vma;
t->buffers[i].tiling =
i915_prandom_u32_max_state(I915_TILING_Y + 1, prng);
i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng);
}
return 0;
@ -197,17 +204,17 @@ static u64 swizzle_bit(unsigned int bit, u64 offset)
static u64 tiled_offset(const struct intel_gt *gt,
u64 v,
unsigned int stride,
unsigned int tiling)
enum client_tiling tiling)
{
unsigned int swizzle;
u64 x, y;
if (tiling == I915_TILING_NONE)
if (tiling == CLIENT_TILING_LINEAR)
return v;
y = div64_u64_rem(v, stride, &x);
if (tiling == I915_TILING_X) {
if (tiling == CLIENT_TILING_X) {
v = div64_u64_rem(y, 8, &y) * stride * 8;
v += y * 512;
v += div64_u64_rem(x, 512, &x) << 12;
@ -244,12 +251,12 @@ static u64 tiled_offset(const struct intel_gt *gt,
return v;
}
static const char *repr_tiling(int tiling)
static const char *repr_tiling(enum client_tiling tiling)
{
switch (tiling) {
case I915_TILING_NONE: return "linear";
case I915_TILING_X: return "X";
case I915_TILING_Y: return "Y";
case CLIENT_TILING_LINEAR: return "linear";
case CLIENT_TILING_X: return "X";
case CLIENT_TILING_Y: return "Y";
default: return "unknown";
}
}

View File

@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
goto err_post_unpin;
intel_engine_pm_might_get(ce->engine);
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto err_unlock;
@ -362,8 +364,8 @@ static int __intel_context_active(struct i915_active *active)
return 0;
}
static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
enum i915_sw_fence_notify state)
static int __i915_sw_fence_call
sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state)
{
return NOTIFY_DONE;
}
@ -399,6 +401,10 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
ce->guc_id.id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(&ce->guc_id.link);
INIT_LIST_HEAD(&ce->destroyed_link);
INIT_LIST_HEAD(&ce->parallel.child_list);
/*
* Initialize fence to be complete as this is expected to be complete
* unless there is a pending schedule disable outstanding.
@ -413,10 +419,17 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
void intel_context_fini(struct intel_context *ce)
{
struct intel_context *child, *next;
if (ce->timeline)
intel_timeline_put(ce->timeline);
i915_vm_put(ce->vm);
/* Need to put the creation ref for the children */
if (intel_context_is_parent(ce))
for_each_child_safe(ce, child, next)
intel_context_put(child);
mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active);
i915_sw_fence_fini(&ce->guc_state.blocked);
@ -515,24 +528,53 @@ retry:
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
{
struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
unsigned long flags;
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
spin_lock_irqsave(&ce->guc_state.lock, flags);
list_for_each_entry_reverse(rq, &ce->guc_state.requests,
/*
* We search the parent list to find an active request on the submitted
* context. The parent list contains the requests for all the contexts
* in the relationship so we have to do a compare of each request's
* context.
*/
spin_lock_irqsave(&parent->guc_state.lock, flags);
list_for_each_entry_reverse(rq, &parent->guc_state.requests,
sched.link) {
if (rq->context != ce)
continue;
if (i915_request_completed(rq))
break;
active = rq;
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
spin_unlock_irqrestore(&parent->guc_state.lock, flags);
return active;
}
void intel_context_bind_parent_child(struct intel_context *parent,
struct intel_context *child)
{
/*
* Callers responsibility to validate that this function is used
* correctly but we use GEM_BUG_ON here ensure that they do.
*/
GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
GEM_BUG_ON(intel_context_is_pinned(parent));
GEM_BUG_ON(intel_context_is_child(parent));
GEM_BUG_ON(intel_context_is_pinned(child));
GEM_BUG_ON(intel_context_is_child(child));
GEM_BUG_ON(intel_context_is_parent(child));
parent->parallel.child_index = parent->parallel.number_children++;
list_add_tail(&child->parallel.child_link,
&parent->parallel.child_list);
child->parallel.parent = parent;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif

View File

@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce);
int intel_context_reconfigure_sseu(struct intel_context *ce,
const struct intel_sseu sseu);
#define PARENT_SCRATCH_SIZE PAGE_SIZE
static inline bool intel_context_is_child(struct intel_context *ce)
{
return !!ce->parallel.parent;
}
static inline bool intel_context_is_parent(struct intel_context *ce)
{
return !!ce->parallel.number_children;
}
static inline bool intel_context_is_pinned(struct intel_context *ce);
static inline struct intel_context *
intel_context_to_parent(struct intel_context *ce)
{
if (intel_context_is_child(ce)) {
/*
* The parent holds ref count to the child so it is always safe
* for the parent to access the child, but the child has a
* pointer to the parent without a ref. To ensure this is safe
* the child should only access the parent pointer while the
* parent is pinned.
*/
GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
return ce->parallel.parent;
} else {
return ce;
}
}
static inline bool intel_context_is_parallel(struct intel_context *ce)
{
return intel_context_is_child(ce) || intel_context_is_parent(ce);
}
void intel_context_bind_parent_child(struct intel_context *parent,
struct intel_context *child);
#define for_each_child(parent, ce)\
list_for_each_entry(ce, &(parent)->parallel.child_list,\
parallel.child_link)
#define for_each_child_safe(parent, ce, cn)\
list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\
parallel.child_link)
/**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
* @ce - the context
@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline;
int err;
err = mutex_lock_interruptible(&tl->mutex);
if (intel_context_is_parent(ce))
err = mutex_lock_interruptible_nested(&tl->mutex, 0);
else if (intel_context_is_child(ce))
err = mutex_lock_interruptible_nested(&tl->mutex,
ce->parallel.child_index + 1);
else
err = mutex_lock_interruptible(&tl->mutex);
if (err)
return ERR_PTR(err);

View File

@ -55,9 +55,13 @@ struct intel_context_ops {
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
/* virtual engine/context interface */
/* virtual/parallel engine/context interface */
struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
unsigned int count);
unsigned int count,
unsigned long flags);
struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
unsigned int num_siblings,
unsigned int width);
struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
unsigned int sibling);
};
@ -113,6 +117,7 @@ struct intel_context {
#define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9
#define CONTEXT_GUC_INIT 10
#define CONTEXT_PERMA_PIN 11
struct {
u64 timeout_us;
@ -197,22 +202,80 @@ struct intel_context {
struct {
/**
* @id: handle which is used to uniquely identify this context
* with the GuC, protected by guc->contexts_lock
* with the GuC, protected by guc->submission_state.lock
*/
u16 id;
/**
* @ref: the number of references to the guc_id, when
* transitioning in and out of zero protected by
* guc->contexts_lock
* guc->submission_state.lock
*/
atomic_t ref;
/**
* @link: in guc->guc_id_list when the guc_id has no refs but is
* still valid, protected by guc->contexts_lock
* still valid, protected by guc->submission_state.lock
*/
struct list_head link;
} guc_id;
/**
* @destroyed_link: link in guc->submission_state.destroyed_contexts, in
* list when context is pending to be destroyed (deregistered with the
* GuC), protected by guc->submission_state.lock
*/
struct list_head destroyed_link;
/** @parallel: sub-structure for parallel submission members */
struct {
union {
/**
* @child_list: parent's list of children
* contexts, no protection as immutable after context
* creation
*/
struct list_head child_list;
/**
* @child_link: child's link into parent's list of
* children
*/
struct list_head child_link;
};
/** @parent: pointer to parent if child */
struct intel_context *parent;
/**
* @last_rq: last request submitted on a parallel context, used
* to insert submit fences between requests in the parallel
* context
*/
struct i915_request *last_rq;
/**
* @fence_context: fence context composite fence when doing
* parallel submission
*/
u64 fence_context;
/**
* @seqno: seqno for composite fence when doing parallel
* submission
*/
u32 seqno;
/** @number_children: number of children if parent */
u8 number_children;
/** @child_index: index into child_list if child */
u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
/** @wqi_head: head pointer in work queue */
u16 wqi_head;
/** @wqi_tail: tail pointer in work queue */
u16 wqi_tail;
/**
* @parent_page: page in context state (ce->state) used
* by parent for work queue, process descriptor
*/
u8 parent_page;
} guc;
} parallel;
#ifdef CONFIG_DRM_I915_SELFTEST
/**
* @drop_schedule_enable: Force drop of schedule enable G2H for selftest

View File

@ -2,6 +2,7 @@
#ifndef _INTEL_RINGBUFFER_H_
#define _INTEL_RINGBUFFER_H_
#include <asm/cacheflush.h>
#include <drm/drm_util.h>
#include <linux/hashtable.h>
@ -281,9 +282,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
#define FORCE_VIRTUAL BIT(0)
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
unsigned int count);
unsigned int count, unsigned long flags);
static inline struct intel_context *
intel_engine_create_parallel(struct intel_engine_cs **engines,
unsigned int num_engines,
unsigned int width)
{
GEM_BUG_ON(!engines[0]->cops->create_parallel);
return engines[0]->cops->create_parallel(engines, num_engines, width);
}
static inline bool
intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)

View File

@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
GEM_DEBUG_WARN_ON(iir);
}
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
u8 logical_instance)
{
const struct engine_info *info = &intel_engines[id];
struct drm_i915_private *i915 = gt->i915;
@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->class = info->class;
engine->instance = info->instance;
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
engine->props.heartbeat_interval_ms =
@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
return info->engine_mask;
}
static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
u8 class, const u8 *map, u8 num_instances)
{
int i, j;
u8 current_logical_id = 0;
for (j = 0; j < num_instances; ++j) {
for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
if (!HAS_ENGINE(gt, i) ||
intel_engines[i].class != class)
continue;
if (intel_engines[i].instance == map[j]) {
logical_ids[intel_engines[i].instance] =
current_logical_id++;
break;
}
}
}
}
static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
{
int i;
u8 map[MAX_ENGINE_INSTANCE + 1];
for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
map[i] = i;
populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
}
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
* @gt: pointer to struct intel_gt
@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0;
unsigned int i;
unsigned int i, class;
u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
int err;
drm_WARN_ON(&i915->drm, engine_mask == 0);
@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
if (i915_inject_probe_failure(i915))
return -ENODEV;
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
if (!HAS_ENGINE(gt, i))
continue;
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
setup_logical_ids(gt, logical_ids, class);
err = intel_engine_setup(gt, i);
if (err)
goto cleanup;
for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
u8 instance = intel_engines[i].instance;
mask |= BIT(i);
if (intel_engines[i].class != class ||
!HAS_ENGINE(gt, i))
continue;
err = intel_engine_setup(gt, i,
logical_ids[instance]);
if (err)
goto cleanup;
mask |= BIT(i);
}
}
/*
@ -1911,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
unsigned int count)
unsigned int count, unsigned long flags)
{
if (count == 0)
return ERR_PTR(-EINVAL);
if (count == 1)
if (count == 1 && !(flags & FORCE_VIRTUAL))
return intel_context_create(siblings[0]);
GEM_BUG_ON(!siblings[0]->cops->create_virtual);
return siblings[0]->cops->create_virtual(siblings, count);
return siblings[0]->cops->create_virtual(siblings, count, flags);
}
struct i915_request *

View File

@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
unsigned long flags;
bool result = true;
/*
* This is execlist specific behaviour intended to ensure the GPU is
* idle by switching to a known 'safe' context. With GuC submission, the
* same idle guarantee is achieved by other means (disabling
* scheduling). Further, switching to a 'safe' context has no effect
* with GuC submission as the scheduler can just switch back again.
*
* FIXME: Move this backend scheduler specific behaviour into the
* scheduler backend.
*/
if (intel_engine_uses_guc(engine))
return true;
/* GPU is pointing to the void, as good as in the kernel context. */
if (intel_gt_is_wedged(engine->gt))
return true;

View File

@ -6,9 +6,11 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
#include "i915_drv.h"
#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
#include "intel_gt_pm.h"
static inline bool
intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
return intel_wakeref_is_active(&engine->wakeref);
}
static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
{
__intel_wakeref_get(&engine->wakeref);
}
static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
{
intel_wakeref_get(&engine->wakeref);
@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
return intel_wakeref_get_if_active(&engine->wakeref);
}
static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)
{
if (!intel_engine_is_virtual(engine)) {
intel_wakeref_might_get(&engine->wakeref);
} else {
struct intel_gt *gt = engine->gt;
struct intel_engine_cs *tengine;
intel_engine_mask_t tmp, mask = engine->mask;
for_each_engine_masked(tengine, gt, mask, tmp)
intel_wakeref_might_get(&tengine->wakeref);
}
intel_gt_pm_might_get(engine->gt);
}
static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
{
intel_wakeref_put(&engine->wakeref);
@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
}
static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)
{
if (!intel_engine_is_virtual(engine)) {
intel_wakeref_might_put(&engine->wakeref);
} else {
struct intel_gt *gt = engine->gt;
struct intel_engine_cs *tengine;
intel_engine_mask_t tmp, mask = engine->mask;
for_each_engine_masked(tengine, gt, mask, tmp)
intel_wakeref_might_put(&tengine->wakeref);
}
intel_gt_pm_might_put(engine->gt);
}
static inline struct i915_request *
intel_engine_create_kernel_request(struct intel_engine_cs *engine)
{

View File

@ -269,6 +269,13 @@ struct intel_engine_cs {
unsigned int guc_id;
intel_engine_mask_t mask;
/**
* @logical_mask: logical mask of engine, reported to user space via
* query IOCTL and used to communicate with the GuC in logical space.
* The logical instance of a physical engine can change based on product
* and fusing.
*/
intel_engine_mask_t logical_mask;
u8 class;
u8 instance;

View File

@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
}
static struct intel_context *
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
unsigned long flags);
static struct i915_request *
__active_request(const struct intel_timeline * const tl,
@ -3784,7 +3785,8 @@ unlock:
}
static struct intel_context *
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
unsigned long flags)
{
struct virtual_engine *ve;
unsigned int n;
@ -3877,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
ve->siblings[ve->num_siblings++] = sibling;
ve->base.mask |= sibling->mask;
ve->base.logical_mask |= sibling->logical_mask;
/*
* All physical engines must be compatible for their emission

View File

@ -13,6 +13,59 @@
#include "pxp/intel_pxp_debugfs.h"
#include "uc/intel_uc_debugfs.h"
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
{
int ret = intel_gt_terminally_wedged(gt);
switch (ret) {
case -EIO:
*val = 1;
return 0;
case 0:
*val = 0;
return 0;
default:
return ret;
}
}
int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
{
/* Flush any previous reset before applying for a new one */
wait_event(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
"Manually reset engine mask to %llx", val);
return 0;
}
/*
* keep the interface clean where the first parameter
* is a 'struct intel_gt *' instead of 'void *'
*/
static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
{
return intel_gt_debugfs_reset_show(data, val);
}
static int __intel_gt_debugfs_reset_store(void *data, u64 val)
{
return intel_gt_debugfs_reset_store(data, val);
}
DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
__intel_gt_debugfs_reset_store, "%llu\n");
static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "reset", &reset_fops, NULL },
};
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
}
void intel_gt_debugfs_register(struct intel_gt *gt)
{
struct dentry *root;
@ -24,6 +77,8 @@ void intel_gt_debugfs_register(struct intel_gt *gt)
if (IS_ERR(root))
return;
gt_debugfs_register(gt, root);
intel_gt_engines_debugfs_register(gt, root);
intel_gt_pm_debugfs_register(gt, root);
intel_sseu_debugfs_register(gt, root);

View File

@ -35,4 +35,8 @@ void intel_gt_debugfs_register_files(struct dentry *root,
const struct intel_gt_debugfs_file *files,
unsigned long count, void *data);
/* functions that need to be accessed by the upper level non-gt interfaces */
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
#endif /* INTEL_GT_DEBUGFS_H */

View File

@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
return intel_wakeref_get_if_active(&gt->wakeref);
}
static inline void intel_gt_pm_might_get(struct intel_gt *gt)
{
intel_wakeref_might_get(&gt->wakeref);
}
static inline void intel_gt_pm_put(struct intel_gt *gt)
{
intel_wakeref_put(&gt->wakeref);
@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(&gt->wakeref);
}
static inline void intel_gt_pm_might_put(struct intel_gt *gt)
{
intel_wakeref_might_put(&gt->wakeref);
}
#define with_intel_gt_pm(gt, tmp) \
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
intel_gt_pm_put(gt), tmp = 0)
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);

View File

@ -20,6 +20,46 @@
#include "intel_uncore.h"
#include "vlv_sideband.h"
int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
{
atomic_inc(&gt->user_wakeref);
intel_gt_pm_get(gt);
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
return 0;
}
int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
{
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_put(gt->uncore);
intel_gt_pm_put(gt);
atomic_dec(&gt->user_wakeref);
return 0;
}
static int forcewake_user_open(struct inode *inode, struct file *file)
{
struct intel_gt *gt = inode->i_private;
return intel_gt_pm_debugfs_forcewake_user_open(gt);
}
static int forcewake_user_release(struct inode *inode, struct file *file)
{
struct intel_gt *gt = inode->i_private;
return intel_gt_pm_debugfs_forcewake_user_release(gt);
}
static const struct file_operations forcewake_user_fops = {
.owner = THIS_MODULE,
.open = forcewake_user_open,
.release = forcewake_user_release,
};
static int fw_domains_show(struct seq_file *m, void *data)
{
struct intel_gt *gt = m->private;
@ -628,6 +668,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{ "drpc", &drpc_fops, NULL },
{ "frequency", &frequency_fops, NULL },
{ "forcewake", &fw_domains_fops, NULL },
{ "forcewake_user", &forcewake_user_fops, NULL},
{ "llc", &llc_fops, llc_eval },
{ "rps_boost", &rps_boost_fops, rps_eval },
};

View File

@ -13,4 +13,8 @@ struct drm_printer;
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
/* functions that need to be accessed by the upper level non-gt interfaces */
int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
#endif /* INTEL_GT_PM_DEBUGFS_H */

View File

@ -3,6 +3,7 @@
* Copyright © 2019 Intel Corporation
*/
#include <asm/tsc.h>
#include <linux/cpufreq.h>
#include "i915_drv.h"

View File

@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
context_size += PARENT_SCRATCH_SIZE;
}
obj = i915_gem_object_create_lmem(engine->i915, context_size,
I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))

View File

@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine);
}

View File

@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline)
timeline->hwsp_map = vaddr;
timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
clflush(vaddr + ofs);
drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
return 0;
}
@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl)
memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
WRITE_ONCE(*hwsp_seqno, tl->seqno);
clflush(hwsp_seqno);
drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
}
void intel_timeline_enter(struct intel_timeline *tl)

View File

@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) {
ve[n] = intel_engine_create_virtual(siblings, nsibling);
ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve[n])) {
err = PTR_ERR(ve[n]);
nctx = n;
@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
* restrict it to our desired engine within the virtual engine.
*/
ve = intel_engine_create_virtual(siblings, nsibling);
ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_close;
@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
i915_request_add(rq);
}
ce = intel_engine_create_virtual(siblings, nsibling);
ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
/* XXX We do not handle oversubscription and fairness with normal rq */
for (n = 0; n < nsibling; n++) {
ce = intel_engine_create_virtual(siblings, nsibling);
ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
if (err)
goto out_scratch;
ve = intel_engine_create_virtual(siblings, nsibling);
ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_scratch;
@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
ve = intel_engine_create_virtual(siblings, nsibling);
ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_spin;

View File

@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
};

View File

@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
}
}
}
void intel_guc_write_barrier(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
/*
* Ensure intel_uncore_write_fw can be used rather than
* intel_uncore_write.
*/
GEM_BUG_ON(guc->send_regs.fw_domains);
/*
* This register is used by the i915 and GuC for MMIO based
* communication. Once we are in this code CTBs are the only
* method the i915 uses to communicate with the GuC so it is
* safe to write to this register (a value of 0 is NOP for MMIO
* communication). If we ever start mixing CTBs and MMIOs a new
* register will have to be chosen. This function is also used
* to enforce ordering of a work queue item write and an update
* to the process descriptor. When a work queue is being used,
* CTBs are also the only mechanism of communication.
*/
intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
} else {
/* wmb() sufficient for a barrier if in smem */
wmb();
}
}

View File

@ -46,6 +46,15 @@ struct intel_guc {
* submitted until the stalled request is processed.
*/
struct i915_request *stalled_request;
/**
* @submission_stall_reason: reason why submission is stalled
*/
enum {
STALL_NONE,
STALL_REGISTER_CONTEXT,
STALL_MOVE_LRC_TAIL,
STALL_ADD_REQUEST,
} submission_stall_reason;
/* intel_guc_recv interrupt related state */
/** @irq_lock: protects GuC irq state */
@ -71,16 +80,41 @@ struct intel_guc {
} interrupts;
/**
* @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and
* ce->guc_id.ref when transitioning in and out of zero
* @submission_state: sub-structure for submission state protected by
* single lock
*/
spinlock_t contexts_lock;
/** @guc_ids: used to allocate unique ce->guc_id.id values */
struct ida guc_ids;
/**
* @guc_id_list: list of intel_context with valid guc_ids but no refs
*/
struct list_head guc_id_list;
struct {
/**
* @lock: protects everything in submission_state,
* ce->guc_id.id, and ce->guc_id.ref when transitioning in and
* out of zero
*/
spinlock_t lock;
/**
* @guc_ids: used to allocate new guc_ids, single-lrc
*/
struct ida guc_ids;
/**
* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
*/
unsigned long *guc_ids_bitmap;
/**
* @guc_id_list: list of intel_context with valid guc_ids but no
* refs
*/
struct list_head guc_id_list;
/**
* @destroyed_contexts: list of contexts waiting to be destroyed
* (deregistered with the GuC)
*/
struct list_head destroyed_contexts;
/**
* @destroyed_worker: worker to deregister contexts, need as we
* need to take a GT PM reference and can't from destroy
* function as it might be in an atomic context (no sleeping)
*/
struct work_struct destroyed_worker;
} submission_state;
/**
* @submission_supported: tracks whether we support GuC submission on
@ -342,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc);
void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
void intel_guc_write_barrier(struct intel_guc *guc);
#endif

View File

@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
for_each_engine(engine, gt, id) {
u8 guc_class = engine_class_to_guc_class(engine->class);
system_info->mapping_table[guc_class][engine->instance] =
system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
engine->instance;
}
}

View File

@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
return ++ct->requests.last_fence;
}
static void write_barrier(struct intel_guc_ct *ct)
{
struct intel_guc *guc = ct_to_guc(ct);
struct intel_gt *gt = guc_to_gt(guc);
if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
GEM_BUG_ON(guc->send_regs.fw_domains);
/*
* This register is used by the i915 and GuC for MMIO based
* communication. Once we are in this code CTBs are the only
* method the i915 uses to communicate with the GuC so it is
* safe to write to this register (a value of 0 is NOP for MMIO
* communication). If we ever start mixing CTBs and MMIOs a new
* register will have to be chosen.
*/
intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
} else {
/* wmb() sufficient for a barrier if in smem */
wmb();
}
}
static int ct_write(struct intel_guc_ct *ct,
const u32 *action,
u32 len /* in dwords */,
@ -474,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct,
* make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail
*/
write_barrier(ct);
intel_guc_write_barrier(ct_to_guc(ct));
/* update local copies */
ctb->tail = tail;

View File

@ -52,27 +52,27 @@
#define GUC_DOORBELL_INVALID 256
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
/* Work queue item header definitions */
/*
* Work queue item header definitions
*
* Work queue is circular buffer used to submit complex (multi-lrc) submissions
* to the GuC. A work queue item is an entry in the circular buffer.
*/
#define WQ_STATUS_ACTIVE 1
#define WQ_STATUS_SUSPENDED 2
#define WQ_STATUS_CMD_ERROR 3
#define WQ_STATUS_ENGINE_ID_NOT_USED 4
#define WQ_STATUS_SUSPENDED_FROM_RESET 5
#define WQ_TYPE_SHIFT 0
#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT)
#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT)
#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT)
#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT)
#define WQ_TARGET_SHIFT 10
#define WQ_LEN_SHIFT 16
#define WQ_NO_WCFLUSH_WAIT (1 << 27)
#define WQ_PRESENT_WORKLOAD (1 << 28)
#define WQ_TYPE_BATCH_BUF 0x1
#define WQ_TYPE_PSEUDO 0x2
#define WQ_TYPE_INORDER 0x3
#define WQ_TYPE_NOOP 0x4
#define WQ_TYPE_MULTI_LRC 0x5
#define WQ_TYPE_MASK GENMASK(7, 0)
#define WQ_LEN_MASK GENMASK(26, 16)
#define WQ_RING_TAIL_SHIFT 20
#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
#define WQ_GUC_ID_MASK GENMASK(15, 0)
#define WQ_RING_TAIL_MASK GENMASK(28, 18)
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
@ -186,7 +186,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
u32 reserved[30];
u32 reserved[36];
} __packed;
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,179 @@
// SPDX-License-Identifier: MIT
/*
* Copyright <EFBFBD><EFBFBD> 2019 Intel Corporation
*/
#include "selftests/igt_spinner.h"
#include "selftests/igt_reset.h"
#include "selftests/intel_scheduler_helpers.h"
#include "gt/intel_engine_heartbeat.h"
#include "gem/selftests/mock_context.h"
static void logical_sort(struct intel_engine_cs **engines, int num_engines)
{
struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1];
int i, j;
for (i = 0; i < num_engines; ++i)
for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) {
if (engines[j]->logical_mask & BIT(i)) {
sorted[i] = engines[j];
break;
}
}
memcpy(*engines, *sorted,
sizeof(struct intel_engine_cs *) * num_engines);
}
static struct intel_context *
multi_lrc_create_parent(struct intel_gt *gt, u8 class,
unsigned long flags)
{
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_engine_cs *engine;
enum intel_engine_id id;
int i = 0;
for_each_engine(engine, gt, id) {
if (engine->class != class)
continue;
siblings[i++] = engine;
}
if (i <= 1)
return ERR_PTR(0);
logical_sort(siblings, i);
return intel_engine_create_parallel(siblings, 1, i);
}
static void multi_lrc_context_unpin(struct intel_context *ce)
{
struct intel_context *child;
GEM_BUG_ON(!intel_context_is_parent(ce));
for_each_child(ce, child)
intel_context_unpin(child);
intel_context_unpin(ce);
}
static void multi_lrc_context_put(struct intel_context *ce)
{
GEM_BUG_ON(!intel_context_is_parent(ce));
/*
* Only the parent gets the creation ref put in the uAPI, the parent
* itself is responsible for creation ref put on the children.
*/
intel_context_put(ce);
}
static struct i915_request *
multi_lrc_nop_request(struct intel_context *ce)
{
struct intel_context *child;
struct i915_request *rq, *child_rq;
int i = 0;
GEM_BUG_ON(!intel_context_is_parent(ce));
rq = intel_context_create_request(ce);
if (IS_ERR(rq))
return rq;
i915_request_get(rq);
i915_request_add(rq);
for_each_child(ce, child) {
child_rq = intel_context_create_request(child);
if (IS_ERR(child_rq))
goto child_error;
if (++i == ce->parallel.number_children)
set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
&child_rq->fence.flags);
i915_request_add(child_rq);
}
return rq;
child_error:
i915_request_put(rq);
return ERR_PTR(-ENOMEM);
}
static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
{
struct intel_context *parent;
struct i915_request *rq;
int ret;
parent = multi_lrc_create_parent(gt, class, 0);
if (IS_ERR(parent)) {
pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
return PTR_ERR(parent);
} else if (!parent) {
pr_debug("Not enough engines in class: %d", class);
return 0;
}
rq = multi_lrc_nop_request(parent);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
pr_err("Failed creating requests: %d", ret);
goto out;
}
ret = intel_selftest_wait_for_rq(rq);
if (ret)
pr_err("Failed waiting on request: %d", ret);
i915_request_put(rq);
if (ret >= 0) {
ret = intel_gt_wait_for_idle(gt, HZ * 5);
if (ret < 0)
pr_err("GT failed to idle: %d\n", ret);
}
out:
multi_lrc_context_unpin(parent);
multi_lrc_context_put(parent);
return ret;
}
static int intel_guc_multi_lrc_basic(void *arg)
{
struct intel_gt *gt = arg;
unsigned int class;
int ret;
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
ret = __intel_guc_multi_lrc_basic(gt, class);
if (ret)
return ret;
}
return 0;
}
int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_multi_lrc_basic),
};
struct intel_gt *gt = &i915->gt;
if (intel_gt_is_wedged(gt))
return 0;
if (!intel_uc_uses_guc_submission(&gt->uc))
return 0;
return intel_gt_live_subtests(tests, gt);
}

View File

@ -35,6 +35,7 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_buffer_pool.h"
#include "gt/intel_gt_clock_utils.h"
#include "gt/intel_gt_debugfs.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_pm_debugfs.h"
#include "gt/intel_gt_requests.h"
@ -553,36 +554,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
return 0;
}
static int
i915_wedged_get(void *data, u64 *val)
static int i915_wedged_get(void *data, u64 *val)
{
struct drm_i915_private *i915 = data;
int ret = intel_gt_terminally_wedged(&i915->gt);
switch (ret) {
case -EIO:
*val = 1;
return 0;
case 0:
*val = 0;
return 0;
default:
return ret;
}
return intel_gt_debugfs_reset_show(&i915->gt, val);
}
static int
i915_wedged_set(void *data, u64 val)
static int i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
/* Flush any previous reset before applying for a new one */
wait_event(i915->gt.reset.queue,
!test_bit(I915_RESET_BACKOFF, &i915->gt.reset.flags));
intel_gt_handle_error(&i915->gt, val, I915_ERROR_CAPTURE,
"Manually set wedged engine mask = %llx", val);
return 0;
return intel_gt_debugfs_reset_store(&i915->gt, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
@ -727,27 +710,15 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
struct intel_gt *gt = &i915->gt;
atomic_inc(&gt->user_wakeref);
intel_gt_pm_get(gt);
if (GRAPHICS_VER(i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
return 0;
return intel_gt_pm_debugfs_forcewake_user_open(&i915->gt);
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
struct intel_gt *gt = &i915->gt;
if (GRAPHICS_VER(i915) >= 6)
intel_uncore_forcewake_user_put(&i915->uncore);
intel_gt_pm_put(gt);
atomic_dec(&gt->user_wakeref);
return 0;
return intel_gt_pm_debugfs_forcewake_user_release(&i915->gt);
}
static const struct file_operations i915_forcewake_fops = {

View File

@ -124,7 +124,9 @@ query_engine_info(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
info.engine.engine_class = engine->uabi_class;
info.engine.engine_instance = engine->uabi_instance;
info.flags = I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE;
info.capabilities = engine->uabi_capabilities;
info.logical_instance = ilog2(engine->logical_mask);
if (copy_to_user(info_ptr, &info, sizeof(info)))
return -EFAULT;

View File

@ -1335,6 +1335,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
return err;
}
static inline bool is_parallel_rq(struct i915_request *rq)
{
return intel_context_is_parallel(rq->context);
}
static inline struct intel_context *request_to_parent(struct i915_request *rq)
{
return intel_context_to_parent(rq->context);
}
static bool is_same_parallel_context(struct i915_request *to,
struct i915_request *from)
{
if (is_parallel_rq(to))
return request_to_parent(to) == request_to_parent(from);
return false;
}
int
i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence)
@ -1366,11 +1385,14 @@ i915_request_await_execution(struct i915_request *rq,
* want to run our callback in all cases.
*/
if (dma_fence_is_i915(fence))
if (dma_fence_is_i915(fence)) {
if (is_same_parallel_context(rq, to_request(fence)))
continue;
ret = __i915_request_await_execution(rq,
to_request(fence));
else
} else {
ret = i915_request_await_external(rq, fence);
}
if (ret < 0)
return ret;
} while (--nchild);
@ -1471,10 +1493,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
fence))
continue;
if (dma_fence_is_i915(fence))
if (dma_fence_is_i915(fence)) {
if (is_same_parallel_context(rq, to_request(fence)))
continue;
ret = i915_request_await_request(rq, to_request(fence));
else
} else {
ret = i915_request_await_external(rq, fence);
}
if (ret < 0)
return ret;
@ -1549,6 +1574,81 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
static struct i915_request *
__i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
{
struct i915_request *prev;
GEM_BUG_ON(!is_parallel_rq(rq));
prev = request_to_parent(rq)->parallel.last_rq;
if (prev) {
if (!__i915_request_is_complete(prev)) {
i915_sw_fence_await_sw_fence(&rq->submit,
&prev->submit,
&rq->submitq);
if (rq->engine->sched_engine->schedule)
__i915_sched_node_add_dependency(&rq->sched,
&prev->sched,
&rq->dep,
0);
}
i915_request_put(prev);
}
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
return to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
}
static struct i915_request *
__i915_request_ensure_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
{
struct i915_request *prev;
GEM_BUG_ON(is_parallel_rq(rq));
prev = to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
if (prev && !__i915_request_is_complete(prev)) {
bool uses_guc = intel_engine_uses_guc(rq->engine);
bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask |
rq->engine->mask);
bool same_context = prev->context == rq->context;
/*
* The requests are supposed to be kept in order. However,
* we need to be wary in case the timeline->last_request
* is used as a barrier for external modification to this
* context.
*/
GEM_BUG_ON(same_context &&
i915_seqno_passed(prev->fence.seqno,
rq->fence.seqno));
if ((same_context && uses_guc) || (!uses_guc && pow2))
i915_sw_fence_await_sw_fence(&rq->submit,
&prev->submit,
&rq->submitq);
else
__i915_sw_fence_await_dma_fence(&rq->submit,
&prev->fence,
&rq->dmaq);
if (rq->engine->sched_engine->schedule)
__i915_sched_node_add_dependency(&rq->sched,
&prev->sched,
&rq->dep,
0);
}
return prev;
}
static struct i915_request *
__i915_request_add_to_timeline(struct i915_request *rq)
{
@ -1574,38 +1674,21 @@ __i915_request_add_to_timeline(struct i915_request *rq)
* complete (to maximise our greedy late load balancing) and this
* precludes optimising to use semaphores serialisation of a single
* timeline across engines.
*
* We do not order parallel submission requests on the timeline as each
* parallel submission context has its own timeline and the ordering
* rules for parallel requests are that they must be submitted in the
* order received from the execbuf IOCTL. So rather than using the
* timeline we store a pointer to last request submitted in the
* relationship in the gem context and insert a submission fence
* between that request and request passed into this function or
* alternatively we use completion fence if gem context has a single
* timeline and this is the first submission of an execbuf IOCTL.
*/
prev = to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
if (prev && !__i915_request_is_complete(prev)) {
bool uses_guc = intel_engine_uses_guc(rq->engine);
/*
* The requests are supposed to be kept in order. However,
* we need to be wary in case the timeline->last_request
* is used as a barrier for external modification to this
* context.
*/
GEM_BUG_ON(prev->context == rq->context &&
i915_seqno_passed(prev->fence.seqno,
rq->fence.seqno));
if ((!uses_guc &&
is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) ||
(uses_guc && prev->context == rq->context))
i915_sw_fence_await_sw_fence(&rq->submit,
&prev->submit,
&rq->submitq);
else
__i915_sw_fence_await_dma_fence(&rq->submit,
&prev->fence,
&rq->dmaq);
if (rq->engine->sched_engine->schedule)
__i915_sched_node_add_dependency(&rq->sched,
&prev->sched,
&rq->dep,
0);
}
if (likely(!is_parallel_rq(rq)))
prev = __i915_request_ensure_ordering(rq, timeline);
else
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
/*
* Make sure that no request gazumped us - if it was allocated after

View File

@ -139,6 +139,29 @@ enum {
* the GPU. Here we track such boost requests on a per-request basis.
*/
I915_FENCE_FLAG_BOOST,
/*
* I915_FENCE_FLAG_SUBMIT_PARALLEL - request with a context in a
* parent-child relationship (parallel submission, multi-lrc) should
* trigger a submission to the GuC rather than just moving the context
* tail.
*/
I915_FENCE_FLAG_SUBMIT_PARALLEL,
/*
* I915_FENCE_FLAG_SKIP_PARALLEL - request with a context in a
* parent-child relationship (parallel submission, multi-lrc) that
* hit an error while generating requests in the execbuf IOCTL.
* Indicates this request should be skipped as another request in
* submission / relationship encoutered an error.
*/
I915_FENCE_FLAG_SKIP_PARALLEL,
/*
* I915_FENCE_FLAG_COMPOSITE - Indicates fence is part of a composite
* fence (dma_fence_array) and i915 generated for parallel submission.
*/
I915_FENCE_FLAG_COMPOSITE,
};
/**

View File

@ -1234,9 +1234,10 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
return i915_active_add_request(&vma->active, rq);
}
int i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
unsigned int flags)
int _i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
struct dma_fence *fence,
unsigned int flags)
{
struct drm_i915_gem_object *obj = vma->obj;
int err;
@ -1257,9 +1258,11 @@ int i915_vma_move_to_active(struct i915_vma *vma,
intel_frontbuffer_put(front);
}
dma_resv_add_excl_fence(vma->resv, &rq->fence);
obj->write_domain = I915_GEM_DOMAIN_RENDER;
obj->read_domains = 0;
if (fence) {
dma_resv_add_excl_fence(vma->resv, fence);
obj->write_domain = I915_GEM_DOMAIN_RENDER;
obj->read_domains = 0;
}
} else {
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
err = dma_resv_reserve_shared(vma->resv, 1);
@ -1267,8 +1270,10 @@ int i915_vma_move_to_active(struct i915_vma *vma,
return err;
}
dma_resv_add_shared_fence(vma->resv, &rq->fence);
obj->write_domain = 0;
if (fence) {
dma_resv_add_shared_fence(vma->resv, fence);
obj->write_domain = 0;
}
}
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)

View File

@ -57,9 +57,16 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq);
int __must_check i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
unsigned int flags);
int __must_check _i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
struct dma_fence *fence,
unsigned int flags);
static inline int __must_check
i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq,
unsigned int flags)
{
return _i915_vma_move_to_active(vma, rq, &rq->fence, flags);
}
#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter)

View File

@ -123,6 +123,12 @@ enum {
__INTEL_WAKEREF_PUT_LAST_BIT__
};
static inline void
intel_wakeref_might_get(struct intel_wakeref *wf)
{
might_lock(&wf->mutex);
}
/**
* intel_wakeref_put_flags: Release the wakeref
* @wf: the wakeref
@ -170,6 +176,12 @@ intel_wakeref_put_delay(struct intel_wakeref *wf, unsigned long delay)
FIELD_PREP(INTEL_WAKEREF_PUT_DELAY, delay));
}
static inline void
intel_wakeref_might_put(struct intel_wakeref *wf)
{
might_lock(&wf->mutex);
}
/**
* intel_wakeref_lock: Lock the wakeref (mutex)
* @wf: the wakeref

View File

@ -48,5 +48,6 @@ selftest(ring_submission, intel_ring_submission_live_selftests)
selftest(perf, i915_perf_live_selftests)
selftest(slpc, intel_slpc_live_selftests)
selftest(guc, intel_guc_live_selftests)
selftest(guc_multi_lrc, intel_guc_multi_lrc_live_selftests)
/* Here be dragons: keep last to run last! */
selftest(late_gt_pm, intel_gt_pm_late_selftests)

View File

@ -6,8 +6,6 @@
#include <drm/ttm/ttm_placement.h>
#include <linux/scatterlist.h>
#include <drm/ttm/ttm_placement.h>
#include "gem/i915_gem_region.h"
#include "intel_memory_region.h"
#include "intel_region_ttm.h"

View File

@ -1830,6 +1830,7 @@ struct drm_i915_gem_context_param {
* Extensions:
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
*/
#define I915_CONTEXT_PARAM_ENGINES 0xa
@ -2104,6 +2105,135 @@ struct i915_context_engines_bond {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* struct i915_context_engines_parallel_submit - Configure engine for
* parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915 to
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior are described below. Lastly, the default is to not allow BBs to be
* preempted mid-batch. Rather insert coordinated preemption points on all
* hardware contexts between each set of BBs. Flags could be added in the future
* to change both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* interface.
*
* .. code-block:: none
*
* Examples syntax:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
*
* Example 1 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can be thought of as two virtual engines, each containing two
* engines thereby making a 2D array. However, there are bonds tying the
* entries together and placing restrictions on how they can be scheduled.
* Specifically, the scheduler can choose only vertical columns from the 2D
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
* versa. Same for CS[2] requires also using CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
*/
struct i915_context_engines_parallel_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine or in other words the
* number of batches in each submission
*/
__u16 width;
/**
* @num_siblings: number of siblings per context or in other words the
* number of possible placements for each submission
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
struct i915_engine_class_instance engines[0];
} __packed;
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
struct i915_user_extension base; \
__u16 engine_index; \
__u16 width; \
__u16 num_siblings; \
__u16 mbz16; \
__u64 flags; \
__u64 mbz64[3]; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* DOC: Context Engine Map uAPI
*
@ -2163,6 +2293,7 @@ struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
struct i915_engine_class_instance engines[0];
} __attribute__((packed));
@ -2781,14 +2912,20 @@ struct drm_i915_engine_info {
/** @flags: Engine flags. */
__u64 flags;
#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
/** @capabilities: Capabilities of this engine. */
__u64 capabilities;
#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
/** @logical_instance: Logical instance of engine */
__u16 logical_instance;
/** @rsvd1: Reserved fields. */
__u64 rsvd1[4];
__u16 rsvd1[3];
/** @rsvd2: Reserved fields. */
__u64 rsvd2[3];
};
/**