drm/i915/gem: Disallow bonding of virtual engines (v3)
This adds a bunch of complexity which the media driver has never actually used. The media driver does technically bond a balanced engine to another engine but the balanced engine only has one engine in the sibling set. This doesn't actually result in a virtual engine. This functionality was originally added to handle cases where we may have more than two video engines and media might want to load-balance their bonded submits by, for instance, submitting to a balanced vcs0-1 as the primary and then vcs2-3 as the secondary. However, no such hardware has shipped thus far and, if we ever want to enable such use-cases in the future, we'll use the up-and-coming parallel submit API which targets GuC submission. This makes I915_CONTEXT_ENGINES_EXT_BOND a total no-op. We leave the validation code in place in case we ever decide we want to do something interesting with the bonding information. v2 (Jason Ekstrand): - Don't delete quite as much code. v3 (Tvrtko Ursulin): - Add some history to the commit message Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-10-jason@jlekstrand.net
This commit is contained in:
parent
c7a71fc8ee
commit
521695c6f1
|
@ -1552,6 +1552,12 @@ set_engines__bond(struct i915_user_extension __user *base, void *data)
|
|||
}
|
||||
virtual = set->engines->engines[idx]->engine;
|
||||
|
||||
if (intel_engine_is_virtual(virtual)) {
|
||||
drm_dbg(&i915->drm,
|
||||
"Bonding with virtual engines not allowed\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = check_user_mbz(&ext->flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -1592,18 +1598,6 @@ set_engines__bond(struct i915_user_extension __user *base, void *data)
|
|||
n, ci.engine_class, ci.engine_instance);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* A non-virtual engine has no siblings to choose between; and
|
||||
* a submit fence will always be directed to the one engine.
|
||||
*/
|
||||
if (intel_engine_is_virtual(virtual)) {
|
||||
err = intel_virtual_engine_attach_bond(virtual,
|
||||
master,
|
||||
bond);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -182,18 +182,6 @@ struct virtual_engine {
|
|||
int prio;
|
||||
} nodes[I915_NUM_ENGINES];
|
||||
|
||||
/*
|
||||
* Keep track of bonded pairs -- restrictions upon on our selection
|
||||
* of physical engines any particular request may be submitted to.
|
||||
* If we receive a submit-fence from a master engine, we will only
|
||||
* use one of sibling_mask physical engines.
|
||||
*/
|
||||
struct ve_bond {
|
||||
const struct intel_engine_cs *master;
|
||||
intel_engine_mask_t sibling_mask;
|
||||
} *bonds;
|
||||
unsigned int num_bonds;
|
||||
|
||||
/* And finally, which physical engines this virtual engine maps onto. */
|
||||
unsigned int num_siblings;
|
||||
struct intel_engine_cs *siblings[];
|
||||
|
@ -3413,7 +3401,6 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk)
|
|||
i915_sched_engine_put(ve->base.sched_engine);
|
||||
intel_engine_free_request_pool(&ve->base);
|
||||
|
||||
kfree(ve->bonds);
|
||||
kfree(ve);
|
||||
}
|
||||
|
||||
|
@ -3668,33 +3655,13 @@ unlock:
|
|||
spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags);
|
||||
}
|
||||
|
||||
static struct ve_bond *
|
||||
virtual_find_bond(struct virtual_engine *ve,
|
||||
const struct intel_engine_cs *master)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ve->num_bonds; i++) {
|
||||
if (ve->bonds[i].master == master)
|
||||
return &ve->bonds[i];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
|
||||
{
|
||||
struct virtual_engine *ve = to_virtual_engine(rq->engine);
|
||||
intel_engine_mask_t allowed, exec;
|
||||
struct ve_bond *bond;
|
||||
|
||||
allowed = ~to_request(signal)->engine->mask;
|
||||
|
||||
bond = virtual_find_bond(ve, to_request(signal)->engine);
|
||||
if (bond)
|
||||
allowed &= bond->sibling_mask;
|
||||
|
||||
/* Restrict the bonded request to run on only the available engines */
|
||||
exec = READ_ONCE(rq->execution_mask);
|
||||
while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
|
||||
|
@ -3850,42 +3817,6 @@ err_put:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
|
||||
const struct intel_engine_cs *master,
|
||||
const struct intel_engine_cs *sibling)
|
||||
{
|
||||
struct virtual_engine *ve = to_virtual_engine(engine);
|
||||
struct ve_bond *bond;
|
||||
int n;
|
||||
|
||||
/* Sanity check the sibling is part of the virtual engine */
|
||||
for (n = 0; n < ve->num_siblings; n++)
|
||||
if (sibling == ve->siblings[n])
|
||||
break;
|
||||
if (n == ve->num_siblings)
|
||||
return -EINVAL;
|
||||
|
||||
bond = virtual_find_bond(ve, master);
|
||||
if (bond) {
|
||||
bond->sibling_mask |= sibling->mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bond = krealloc(ve->bonds,
|
||||
sizeof(*bond) * (ve->num_bonds + 1),
|
||||
GFP_KERNEL);
|
||||
if (!bond)
|
||||
return -ENOMEM;
|
||||
|
||||
bond[ve->num_bonds].master = master;
|
||||
bond[ve->num_bonds].sibling_mask = sibling->mask;
|
||||
|
||||
ve->bonds = bond;
|
||||
ve->num_bonds++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void intel_execlists_show_requests(struct intel_engine_cs *engine,
|
||||
struct drm_printer *m,
|
||||
void (*show_request)(struct drm_printer *m,
|
||||
|
|
|
@ -36,8 +36,7 @@ struct intel_context *
|
|||
intel_execlists_create_virtual(struct intel_engine_cs **siblings,
|
||||
unsigned int count);
|
||||
|
||||
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
|
||||
const struct intel_engine_cs *master,
|
||||
const struct intel_engine_cs *sibling);
|
||||
bool
|
||||
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
|
||||
|
||||
#endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */
|
||||
|
|
|
@ -4328,234 +4328,6 @@ static int live_virtual_preserved(void *arg)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int bond_virtual_engine(struct intel_gt *gt,
|
||||
unsigned int class,
|
||||
struct intel_engine_cs **siblings,
|
||||
unsigned int nsibling,
|
||||
unsigned int flags)
|
||||
#define BOND_SCHEDULE BIT(0)
|
||||
{
|
||||
struct intel_engine_cs *master;
|
||||
struct i915_request *rq[16];
|
||||
enum intel_engine_id id;
|
||||
struct igt_spinner spin;
|
||||
unsigned long n;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* A set of bonded requests is intended to be run concurrently
|
||||
* across a number of engines. We use one request per-engine
|
||||
* and a magic fence to schedule each of the bonded requests
|
||||
* at the same time. A consequence of our current scheduler is that
|
||||
* we only move requests to the HW ready queue when the request
|
||||
* becomes ready, that is when all of its prerequisite fences have
|
||||
* been signaled. As one of those fences is the master submit fence,
|
||||
* there is a delay on all secondary fences as the HW may be
|
||||
* currently busy. Equally, as all the requests are independent,
|
||||
* they may have other fences that delay individual request
|
||||
* submission to HW. Ergo, we do not guarantee that all requests are
|
||||
* immediately submitted to HW at the same time, just that if the
|
||||
* rules are abided by, they are ready at the same time as the
|
||||
* first is submitted. Userspace can embed semaphores in its batch
|
||||
* to ensure parallel execution of its phases as it requires.
|
||||
* Though naturally it gets requested that perhaps the scheduler should
|
||||
* take care of parallel execution, even across preemption events on
|
||||
* different HW. (The proper answer is of course "lalalala".)
|
||||
*
|
||||
* With the submit-fence, we have identified three possible phases
|
||||
* of synchronisation depending on the master fence: queued (not
|
||||
* ready), executing, and signaled. The first two are quite simple
|
||||
* and checked below. However, the signaled master fence handling is
|
||||
* contentious. Currently we do not distinguish between a signaled
|
||||
* fence and an expired fence, as once signaled it does not convey
|
||||
* any information about the previous execution. It may even be freed
|
||||
* and hence checking later it may not exist at all. Ergo we currently
|
||||
* do not apply the bonding constraint for an already signaled fence,
|
||||
* as our expectation is that it should not constrain the secondaries
|
||||
* and is outside of the scope of the bonded request API (i.e. all
|
||||
* userspace requests are meant to be running in parallel). As
|
||||
* it imposes no constraint, and is effectively a no-op, we do not
|
||||
* check below as normal execution flows are checked extensively above.
|
||||
*
|
||||
* XXX Is the degenerate handling of signaled submit fences the
|
||||
* expected behaviour for userpace?
|
||||
*/
|
||||
|
||||
GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
|
||||
|
||||
if (igt_spinner_init(&spin, gt))
|
||||
return -ENOMEM;
|
||||
|
||||
err = 0;
|
||||
rq[0] = ERR_PTR(-ENOMEM);
|
||||
for_each_engine(master, gt, id) {
|
||||
struct i915_sw_fence fence = {};
|
||||
struct intel_context *ce;
|
||||
|
||||
if (master->class == class)
|
||||
continue;
|
||||
|
||||
ce = intel_context_create(master);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
|
||||
|
||||
rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
|
||||
intel_context_put(ce);
|
||||
if (IS_ERR(rq[0])) {
|
||||
err = PTR_ERR(rq[0]);
|
||||
goto out;
|
||||
}
|
||||
i915_request_get(rq[0]);
|
||||
|
||||
if (flags & BOND_SCHEDULE) {
|
||||
onstack_fence_init(&fence);
|
||||
err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
|
||||
&fence,
|
||||
GFP_KERNEL);
|
||||
}
|
||||
|
||||
i915_request_add(rq[0]);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
if (!(flags & BOND_SCHEDULE) &&
|
||||
!igt_wait_for_spinner(&spin, rq[0])) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (n = 0; n < nsibling; n++) {
|
||||
struct intel_context *ve;
|
||||
|
||||
ve = intel_execlists_create_virtual(siblings, nsibling);
|
||||
if (IS_ERR(ve)) {
|
||||
err = PTR_ERR(ve);
|
||||
onstack_fence_fini(&fence);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = intel_virtual_engine_attach_bond(ve->engine,
|
||||
master,
|
||||
siblings[n]);
|
||||
if (err) {
|
||||
intel_context_put(ve);
|
||||
onstack_fence_fini(&fence);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = intel_context_pin(ve);
|
||||
intel_context_put(ve);
|
||||
if (err) {
|
||||
onstack_fence_fini(&fence);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rq[n + 1] = i915_request_create(ve);
|
||||
intel_context_unpin(ve);
|
||||
if (IS_ERR(rq[n + 1])) {
|
||||
err = PTR_ERR(rq[n + 1]);
|
||||
onstack_fence_fini(&fence);
|
||||
goto out;
|
||||
}
|
||||
i915_request_get(rq[n + 1]);
|
||||
|
||||
err = i915_request_await_execution(rq[n + 1],
|
||||
&rq[0]->fence,
|
||||
ve->engine->bond_execute);
|
||||
i915_request_add(rq[n + 1]);
|
||||
if (err < 0) {
|
||||
onstack_fence_fini(&fence);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
onstack_fence_fini(&fence);
|
||||
intel_engine_flush_submission(master);
|
||||
igt_spinner_end(&spin);
|
||||
|
||||
if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
|
||||
pr_err("Master request did not execute (on %s)!\n",
|
||||
rq[0]->engine->name);
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (n = 0; n < nsibling; n++) {
|
||||
if (i915_request_wait(rq[n + 1], 0,
|
||||
MAX_SCHEDULE_TIMEOUT) < 0) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rq[n + 1]->engine != siblings[n]) {
|
||||
pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
|
||||
siblings[n]->name,
|
||||
rq[n + 1]->engine->name,
|
||||
rq[0]->engine->name);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
for (n = 0; !IS_ERR(rq[n]); n++)
|
||||
i915_request_put(rq[n]);
|
||||
rq[0] = ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
out:
|
||||
for (n = 0; !IS_ERR(rq[n]); n++)
|
||||
i915_request_put(rq[n]);
|
||||
if (igt_flush_test(gt->i915))
|
||||
err = -EIO;
|
||||
|
||||
igt_spinner_fini(&spin);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int live_virtual_bond(void *arg)
|
||||
{
|
||||
static const struct phase {
|
||||
const char *name;
|
||||
unsigned int flags;
|
||||
} phases[] = {
|
||||
{ "", 0 },
|
||||
{ "schedule", BOND_SCHEDULE },
|
||||
{ },
|
||||
};
|
||||
struct intel_gt *gt = arg;
|
||||
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
|
||||
unsigned int class;
|
||||
int err;
|
||||
|
||||
if (intel_uc_uses_guc_submission(>->uc))
|
||||
return 0;
|
||||
|
||||
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
|
||||
const struct phase *p;
|
||||
int nsibling;
|
||||
|
||||
nsibling = select_siblings(gt, class, siblings);
|
||||
if (nsibling < 2)
|
||||
continue;
|
||||
|
||||
for (p = phases; p->name; p++) {
|
||||
err = bond_virtual_engine(gt,
|
||||
class, siblings, nsibling,
|
||||
p->flags);
|
||||
if (err) {
|
||||
pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
|
||||
__func__, p->name, class, nsibling, err);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int reset_virtual_engine(struct intel_gt *gt,
|
||||
struct intel_engine_cs **siblings,
|
||||
unsigned int nsibling)
|
||||
|
@ -4721,7 +4493,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
|
|||
SUBTEST(live_virtual_mask),
|
||||
SUBTEST(live_virtual_preserved),
|
||||
SUBTEST(live_virtual_slice),
|
||||
SUBTEST(live_virtual_bond),
|
||||
SUBTEST(live_virtual_reset),
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue