linux-stable/drivers/gpu/drm/xe/xe_engine.c

735 lines
17 KiB
C
Raw Normal View History

drm/xe: Introduce a new DRM driver for Intel GPUs Xe, is a new driver for Intel GPUs that supports both integrated and discrete platforms starting with Tiger Lake (first Intel Xe Architecture). The code is at a stage where it is already functional and has experimental support for multiple platforms starting from Tiger Lake, with initial support implemented in Mesa (for Iris and Anv, our OpenGL and Vulkan drivers), as well as in NEO (for OpenCL and Level0). The new Xe driver leverages a lot from i915. As for display, the intent is to share the display code with the i915 driver so that there is maximum reuse there. But it is not added in this patch. This initial work is a collaboration of many people and unfortunately the big squashed patch won't fully honor the proper credits. But let's get some git quick stats so we can at least try to preserve some of the credits: Co-developed-by: Matthew Brost <matthew.brost@intel.com> Co-developed-by: Matthew Auld <matthew.auld@intel.com> Co-developed-by: Matt Roper <matthew.d.roper@intel.com> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Co-developed-by: Francois Dugast <francois.dugast@intel.com> Co-developed-by: Lucas De Marchi <lucas.demarchi@intel.com> Co-developed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Co-developed-by: Philippe Lecluse <philippe.lecluse@intel.com> Co-developed-by: Nirmoy Das <nirmoy.das@intel.com> Co-developed-by: Jani Nikula <jani.nikula@intel.com> Co-developed-by: José Roberto de Souza <jose.souza@intel.com> Co-developed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Co-developed-by: Dave Airlie <airlied@redhat.com> Co-developed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Co-developed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Co-developed-by: Mauro Carvalho Chehab <mchehab@kernel.org> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
2023-03-30 21:31:57 +00:00
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include "xe_engine.h"
#include <drm/drm_device.h>
#include <drm/drm_file.h>
#include <drm/xe_drm.h>
#include <linux/nospec.h>
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_migrate.h"
#include "xe_pm.h"
#include "xe_trace.h"
#include "xe_vm.h"
static struct xe_engine *__xe_engine_create(struct xe_device *xe,
struct xe_vm *vm,
u32 logical_mask,
u16 width, struct xe_hw_engine *hwe,
u32 flags)
{
struct xe_engine *e;
struct xe_gt *gt = hwe->gt;
int err;
int i;
e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
if (!e)
return ERR_PTR(-ENOMEM);
kref_init(&e->refcount);
e->flags = flags;
e->hwe = hwe;
e->gt = gt;
if (vm)
e->vm = xe_vm_get(vm);
e->class = hwe->class;
e->width = width;
e->logical_mask = logical_mask;
e->fence_irq = &gt->fence_irq[hwe->class];
e->ring_ops = gt->ring_ops[hwe->class];
e->ops = gt->engine_ops;
INIT_LIST_HEAD(&e->persitent.link);
INIT_LIST_HEAD(&e->compute.link);
INIT_LIST_HEAD(&e->multi_gt_link);
/* FIXME: Wire up to configurable default value */
e->sched_props.timeslice_us = 1 * 1000;
e->sched_props.preempt_timeout_us = 640 * 1000;
if (xe_engine_is_parallel(e)) {
e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
e->parallel.composite_fence_seqno = 1;
}
if (e->flags & ENGINE_FLAG_VM) {
e->bind.fence_ctx = dma_fence_context_alloc(1);
e->bind.fence_seqno = 1;
}
for (i = 0; i < width; ++i) {
err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K);
if (err)
goto err_lrc;
}
err = e->ops->init(e);
if (err)
goto err_lrc;
return e;
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_finish(e->lrc + i);
kfree(e);
return ERR_PTR(err);
}
struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hwe, u32 flags)
{
struct ww_acquire_ctx ww;
struct xe_engine *e;
int err;
if (vm) {
err = xe_vm_lock(vm, &ww, 0, true);
if (err)
return ERR_PTR(err);
}
e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags);
if (vm)
xe_vm_unlock(vm, &ww);
return e;
}
struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
struct xe_vm *vm,
enum xe_engine_class class, u32 flags)
{
struct xe_hw_engine *hwe, *hwe0 = NULL;
enum xe_hw_engine_id id;
u32 logical_mask = 0;
for_each_hw_engine(hwe, gt, id) {
if (xe_hw_engine_is_reserved(hwe))
continue;
if (hwe->class == class) {
logical_mask |= BIT(hwe->logical_instance);
if (!hwe0)
hwe0 = hwe;
}
}
if (!logical_mask)
return ERR_PTR(-ENODEV);
return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags);
}
void xe_engine_destroy(struct kref *ref)
{
struct xe_engine *e = container_of(ref, struct xe_engine, refcount);
struct xe_engine *engine, *next;
if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(engine, next, &e->multi_gt_list,
multi_gt_link)
xe_engine_put(engine);
}
e->ops->fini(e);
}
void xe_engine_fini(struct xe_engine *e)
{
int i;
for (i = 0; i < e->width; ++i)
xe_lrc_finish(e->lrc + i);
if (e->vm)
xe_vm_put(e->vm);
kfree(e);
}
struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
{
struct xe_engine *e;
mutex_lock(&xef->engine.lock);
e = xa_load(&xef->engine.xa, id);
mutex_unlock(&xef->engine.lock);
if (e)
xe_engine_get(e);
return e;
}
static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
return -EINVAL;
if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH &&
!capable(CAP_SYS_NICE)))
return -EPERM;
return e->ops->set_priority(e, value);
}
static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (!capable(CAP_SYS_NICE))
return -EPERM;
return e->ops->set_timeslice(e, value);
}
static int engine_set_preemption_timeout(struct xe_device *xe,
struct xe_engine *e, u64 value,
bool create)
{
if (!capable(CAP_SYS_NICE))
return -EPERM;
return e->ops->set_preempt_timeout(e, value);
}
static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, !create))
return -EINVAL;
if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
return -EINVAL;
if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM))
return -EINVAL;
if (value) {
struct xe_vm *vm = e->vm;
int err;
if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm)))
return -EOPNOTSUPP;
if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm)))
return -EOPNOTSUPP;
if (XE_IOCTL_ERR(xe, e->width != 1))
return -EINVAL;
e->compute.context = dma_fence_context_alloc(1);
spin_lock_init(&e->compute.lock);
err = xe_vm_add_compute_engine(vm, e);
if (XE_IOCTL_ERR(xe, err))
return err;
e->flags |= ENGINE_FLAG_COMPUTE_MODE;
e->flags &= ~ENGINE_FLAG_PERSISTENT;
}
return 0;
}
static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, !create))
return -EINVAL;
if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
return -EINVAL;
if (value)
e->flags |= ENGINE_FLAG_PERSISTENT;
else
e->flags &= ~ENGINE_FLAG_PERSISTENT;
return 0;
}
static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, !create))
return -EINVAL;
if (!capable(CAP_SYS_NICE))
return -EPERM;
return e->ops->set_job_timeout(e, value);
}
static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, !create))
return -EINVAL;
if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
return -EINVAL;
e->usm.acc_trigger = value;
return 0;
}
static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, !create))
return -EINVAL;
if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
return -EINVAL;
e->usm.acc_notify = value;
return 0;
}
static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
u64 value, bool create)
{
if (XE_IOCTL_ERR(xe, !create))
return -EINVAL;
if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
return -EINVAL;
e->usm.acc_granularity = value;
return 0;
}
typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
struct xe_engine *e,
u64 value, bool create);
static const xe_engine_set_property_fn engine_set_property_funcs[] = {
[XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority,
[XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice,
[XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
[XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
[XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence,
[XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
[XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
[XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
[XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
};
static int engine_user_ext_set_property(struct xe_device *xe,
struct xe_engine *e,
u64 extension,
bool create)
{
u64 __user *address = u64_to_user_ptr(extension);
struct drm_xe_ext_engine_set_property ext;
int err;
u32 idx;
err = __copy_from_user(&ext, address, sizeof(ext));
if (XE_IOCTL_ERR(xe, err))
return -EFAULT;
if (XE_IOCTL_ERR(xe, ext.property >=
ARRAY_SIZE(engine_set_property_funcs)))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
return engine_set_property_funcs[idx](xe, e, ext.value, create);
}
typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe,
struct xe_engine *e,
u64 extension,
bool create);
static const xe_engine_set_property_fn engine_user_extension_funcs[] = {
[XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property,
};
#define MAX_USER_EXTENSIONS 16
static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
u64 extensions, int ext_number, bool create)
{
u64 __user *address = u64_to_user_ptr(extensions);
struct xe_user_extension ext;
int err;
u32 idx;
if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
return -E2BIG;
err = __copy_from_user(&ext, address, sizeof(ext));
if (XE_IOCTL_ERR(xe, err))
return -EFAULT;
if (XE_IOCTL_ERR(xe, ext.name >=
ARRAY_SIZE(engine_user_extension_funcs)))
return -EINVAL;
idx = array_index_nospec(ext.name,
ARRAY_SIZE(engine_user_extension_funcs));
err = engine_user_extension_funcs[idx](xe, e, extensions, create);
if (XE_IOCTL_ERR(xe, err))
return err;
if (ext.next_extension)
return engine_user_extensions(xe, e, ext.next_extension,
++ext_number, create);
return 0;
}
static const enum xe_engine_class user_to_xe_engine_class[] = {
[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
};
static struct xe_hw_engine *
find_hw_engine(struct xe_device *xe,
struct drm_xe_engine_class_instance eci)
{
u32 idx;
if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
return NULL;
if (eci.gt_id >= xe->info.tile_count)
return NULL;
idx = array_index_nospec(eci.engine_class,
ARRAY_SIZE(user_to_xe_engine_class));
return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
user_to_xe_engine_class[idx],
eci.engine_instance, true);
}
static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
struct drm_xe_engine_class_instance *eci,
u16 width, u16 num_placements)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
u32 logical_mask = 0;
if (XE_IOCTL_ERR(xe, width != 1))
return 0;
if (XE_IOCTL_ERR(xe, num_placements != 1))
return 0;
if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0))
return 0;
eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
for_each_hw_engine(hwe, gt, id) {
if (xe_hw_engine_is_reserved(hwe))
continue;
if (hwe->class ==
user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
logical_mask |= BIT(hwe->logical_instance);
}
return logical_mask;
}
static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
struct drm_xe_engine_class_instance *eci,
u16 width, u16 num_placements)
{
int len = width * num_placements;
int i, j, n;
u16 class;
u16 gt_id;
u32 return_mask = 0, prev_mask;
if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) &&
len > 1))
return 0;
for (i = 0; i < width; ++i) {
u32 current_mask = 0;
for (j = 0; j < num_placements; ++j) {
struct xe_hw_engine *hwe;
n = j * width + i;
hwe = find_hw_engine(xe, eci[n]);
if (XE_IOCTL_ERR(xe, !hwe))
return 0;
if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe)))
return 0;
if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) ||
XE_IOCTL_ERR(xe, n && eci[n].engine_class != class))
return 0;
class = eci[n].engine_class;
gt_id = eci[n].gt_id;
if (width == 1 || !i)
return_mask |= BIT(eci[n].engine_instance);
current_mask |= BIT(eci[n].engine_instance);
}
/* Parallel submissions must be logically contiguous */
if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1))
return 0;
prev_mask = current_mask;
}
return return_mask;
}
int xe_engine_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_engine_create *args = data;
struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
struct drm_xe_engine_class_instance __user *user_eci =
u64_to_user_ptr(args->instances);
struct xe_hw_engine *hwe;
struct xe_vm *vm, *migrate_vm;
struct xe_gt *gt;
struct xe_engine *e = NULL;
u32 logical_mask;
u32 id;
int len;
int err;
if (XE_IOCTL_ERR(xe, args->flags))
return -EINVAL;
len = args->width * args->num_placements;
if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
return -EINVAL;
err = __copy_from_user(eci, user_eci,
sizeof(struct drm_xe_engine_class_instance) *
len);
if (XE_IOCTL_ERR(xe, err))
return -EFAULT;
if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
return -EINVAL;
xe_pm_runtime_get(xe);
if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
for_each_gt(gt, xe, id) {
struct xe_engine *new;
if (xe_gt_is_media_type(gt))
continue;
eci[0].gt_id = gt->info.id;
logical_mask = bind_engine_logical_mask(xe, gt, eci,
args->width,
args->num_placements);
if (XE_IOCTL_ERR(xe, !logical_mask)) {
err = -EINVAL;
goto put_rpm;
}
hwe = find_hw_engine(xe, eci[0]);
if (XE_IOCTL_ERR(xe, !hwe)) {
err = -EINVAL;
goto put_rpm;
}
migrate_vm = xe_migrate_get_vm(gt->migrate);
new = xe_engine_create(xe, migrate_vm, logical_mask,
args->width, hwe,
ENGINE_FLAG_PERSISTENT |
ENGINE_FLAG_VM |
(id ?
ENGINE_FLAG_BIND_ENGINE_CHILD :
0));
xe_vm_put(migrate_vm);
if (IS_ERR(new)) {
err = PTR_ERR(new);
if (e)
goto put_engine;
goto put_rpm;
}
if (id == 0)
e = new;
else
list_add_tail(&new->multi_gt_list,
&e->multi_gt_link);
}
} else {
gt = xe_device_get_gt(xe, eci[0].gt_id);
logical_mask = calc_validate_logical_mask(xe, gt, eci,
args->width,
args->num_placements);
if (XE_IOCTL_ERR(xe, !logical_mask)) {
err = -EINVAL;
goto put_rpm;
}
hwe = find_hw_engine(xe, eci[0]);
if (XE_IOCTL_ERR(xe, !hwe)) {
err = -EINVAL;
goto put_rpm;
}
vm = xe_vm_lookup(xef, args->vm_id);
if (XE_IOCTL_ERR(xe, !vm)) {
err = -ENOENT;
goto put_rpm;
}
e = xe_engine_create(xe, vm, logical_mask,
args->width, hwe, ENGINE_FLAG_PERSISTENT);
xe_vm_put(vm);
if (IS_ERR(e)) {
err = PTR_ERR(e);
goto put_rpm;
}
}
if (args->extensions) {
err = engine_user_extensions(xe, e, args->extensions, 0, true);
if (XE_IOCTL_ERR(xe, err))
goto put_engine;
}
if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
!!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
err = -ENOTSUPP;
goto put_engine;
}
e->persitent.xef = xef;
mutex_lock(&xef->engine.lock);
err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
mutex_unlock(&xef->engine.lock);
if (err)
goto put_engine;
args->engine_id = id;
return 0;
put_engine:
xe_engine_kill(e);
xe_engine_put(e);
put_rpm:
xe_pm_runtime_put(xe);
return err;
}
static void engine_kill_compute(struct xe_engine *e)
{
if (!xe_vm_in_compute_mode(e->vm))
return;
down_write(&e->vm->lock);
list_del(&e->compute.link);
--e->vm->preempt.num_engines;
if (e->compute.pfence) {
dma_fence_enable_sw_signaling(e->compute.pfence);
dma_fence_put(e->compute.pfence);
e->compute.pfence = NULL;
}
up_write(&e->vm->lock);
}
void xe_engine_kill(struct xe_engine *e)
{
struct xe_engine *engine = e, *next;
list_for_each_entry_safe(engine, next, &engine->multi_gt_list,
multi_gt_link) {
e->ops->kill(engine);
engine_kill_compute(engine);
}
e->ops->kill(e);
engine_kill_compute(e);
}
int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_engine_destroy *args = data;
struct xe_engine *e;
if (XE_IOCTL_ERR(xe, args->pad))
return -EINVAL;
mutex_lock(&xef->engine.lock);
e = xa_erase(&xef->engine.xa, args->engine_id);
mutex_unlock(&xef->engine.lock);
if (XE_IOCTL_ERR(xe, !e))
return -ENOENT;
if (!(e->flags & ENGINE_FLAG_PERSISTENT))
xe_engine_kill(e);
else
xe_device_add_persitent_engines(xe, e);
trace_xe_engine_close(e);
xe_engine_put(e);
xe_pm_runtime_put(xe);
return 0;
}
int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_engine_set_property *args = data;
struct xe_engine *e;
int ret;
u32 idx;
e = xe_engine_lookup(xef, args->engine_id);
if (XE_IOCTL_ERR(xe, !e))
return -ENOENT;
if (XE_IOCTL_ERR(xe, args->property >=
ARRAY_SIZE(engine_set_property_funcs))) {
ret = -EINVAL;
goto out;
}
idx = array_index_nospec(args->property,
ARRAY_SIZE(engine_set_property_funcs));
ret = engine_set_property_funcs[idx](xe, e, args->value, false);
if (XE_IOCTL_ERR(xe, ret))
goto out;
if (args->extensions)
ret = engine_user_extensions(xe, e, args->extensions, 0,
false);
out:
xe_engine_put(e);
return ret;
}