From 84a1ed5e67565b09b8fd22a26754d2897de55ce0 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 22 Feb 2024 18:23:56 -0500 Subject: [PATCH 0001/1477] drm/xe/uapi: Remove unused flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those cases missed in previous uAPI cleanups were mostly accidentally brought in from i915 or created to exercise the possibilities of gpuvm but they are not used by userspace yet, so let's remove them. They can still be brought back later if needed. v2: - Fix XE_VM_FLAG_FAULT_MODE support in xe_lrc.c (Brian Welty) - Leave DRM_XE_VM_BIND_OP_UNMAP_ALL (José Roberto de Souza) - Ensure invalid flag values are rejected (Rodrigo Vivi) v3: Rebase after removal of persistent exec_queues (Francois Dugast) v4: Rodrigo: Rebase after the new dumpable flag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232356.175431-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 94 +----------------------- drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 --- drivers/gpu/drm/xe/xe_lrc.c | 10 +-- drivers/gpu/drm/xe/xe_vm.c | 12 +-- drivers/gpu/drm/xe/xe_vm_types.h | 4 - include/uapi/drm/xe_drm.h | 19 ----- 6 files changed, 6 insertions(+), 143 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 4bb8f897bf15..11e150f4c0c1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -354,91 +354,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return 0; } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - if (!create) - return q->ops->set_preempt_timeout(q, value); - - q->sched_props.preempt_timeout_us = value; - return 0; -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - q->sched_props.job_timeout_ms = value; - - return 0; -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -446,11 +361,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -469,7 +379,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index c40240e88068..62b3d9d1d7cd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -141,16 +141,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @usm.acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @usm.acc_notify: access counter notify */ - u32 acc_notify; - /** @usm.acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 8c85e90220de..7ad853b0788a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -706,8 +706,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -778,13 +776,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e3bde897f6e8..db3f049a47dc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2131,10 +2131,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; @@ -2329,8 +2325,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? @@ -2475,7 +2469,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2750,9 +2744,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7d4f810f9c04..3fce50b91256 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -294,10 +294,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @dumpable: whether BO is dumped on GPU hang */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2fefec9c0e94..538a3ac95c54 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -862,10 +862,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -958,8 +954,6 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ @@ -1076,19 +1070,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; From ddadc7120d4be7a40a9745924339c472c5850d14 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:19 -0800 Subject: [PATCH 0002/1477] drm/xe: Fix execlist splat Although execlist submission is not supported it should be kept in a basic working state as it can be used for very early hardware bring up. Fix the below splat. WARNING: CPU: 3 PID: 11 at drivers/gpu/drm/xe/xe_execlist.c:217 execlist_run_job+0x1c2/0x220 [xe] Modules linked in: xe drm_kunit_helpers drm_gpuvm drm_ttm_helper ttm drm_exec drm_suballoc_helper drm_buddy gpu_sched mei_pxp mei_hdcp wmi_bmof x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core video snd_pcm mei_me mei wmi fuse e1000e i2c_i801 ptp i2c_smbus pps_core intel_lpss_pci CPU: 3 PID: 11 Comm: kworker/u16:0 Tainted: G U 6.8.0-rc3-guc+ #1046 Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 Workqueue: rcs0 drm_sched_run_job_work [gpu_sched] RIP: 0010:execlist_run_job+0x1c2/0x220 [xe] Code: 8b f8 03 00 00 4c 89 39 e9 e2 fe ff ff 49 8d 7d 20 be ff ff ff ff e8 ed fd a6 e1 85 c0 0f 85 e1 fe ff ff 0f 0b e9 da fe ff ff <0f> 0b 0f 0b 41 83 fc 03 0f 86 8a fe ff ff 0f 0b e9 83 fe ff ff be RSP: 0018:ffffc9000013bdb8 EFLAGS: 00010246 RAX: ffff888105021a00 RBX: ffff888105078400 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffc9000013bd14 RDI: ffffc90001609090 RBP: ffff88811e3f0040 R08: 0000000000000088 R09: 00000000ffffff81 R10: 0000000000000001 R11: ffff88810c10c000 R12: 00000000fffffffe R13: ffff888109b72c28 R14: ffff8881050784a0 R15: ffff888105078408 FS: 0000000000000000(0000) GS:ffff88849f980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563459d130f8 CR3: 000000000563a001 CR4: 0000000000f70ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x7f/0x170 ? execlist_run_job+0x1c2/0x220 [xe] ? report_bug+0x1c7/0x1d0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? execlist_run_job+0x1c2/0x220 [xe] ? execlist_run_job+0x2c/0x220 [xe] drm_sched_run_job_work+0x246/0x3f0 [gpu_sched] ? process_one_work+0x18d/0x4e0 process_one_work+0x1f7/0x4e0 worker_thread+0x1da/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0xfe/0x130 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 9b9529ce379a ("drm/xe: Rename engine to exec_queue") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_execlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 1788e78caf5c..dece2785933c 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); From 3121fed0c51beb8ea7b18ab2ceff1ac9e358ac53 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:20 -0800 Subject: [PATCH 0003/1477] drm/xe: Cleanup some layering in GGTT xe_ggtt.c touched GuC layers which is incorrect. Call into xe_gt_tlb_invalidation layer instead. Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 7 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 37 +++---------------- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 41 ++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 2 +- 4 files changed, 54 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 92320bbc9d3d..4e7f809d2b00 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -110,6 +110,13 @@ #define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) #define MED_VF_SW_FLAG_COUNT 4 +#define GUC_TLB_INV_CR XE_REG(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) + /* GuC Interrupt Vector */ #define GUC_INTR_GUC2HOST REG_BIT(15) #define GUC_INTR_EXEC_ERROR REG_BIT(14) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ab96edb058d6..5d46958e3144 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -19,7 +19,6 @@ #include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" -#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_wopcm.h" @@ -249,15 +248,11 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) return err; } -#define GUC_TLB_INV_CR XE_REG(0xcee8) -#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) -#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) -#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) - static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + int err; + if (!gt) return; @@ -267,29 +262,9 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) * and GuC are accessible. */ xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - - /* TODO: vfunc for GuC vs. non-GuC */ - - if (gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - xe_gt_assert(gt, seqno > 0); - if (seqno > 0) - xe_gt_tlb_invalidation_wait(gt, seqno); - } else if (xe_device_uc_enabled(gt_to_xe(gt))) { - struct xe_device *xe = gt_to_xe(gt); - - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else - xe_mmio_write32(gt, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - + err = xe_gt_tlb_invalidation_ggtt(gt); + if (err) + drm_warn(&xe->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index e3a4131ebb58..a7b1f7cfec87 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -11,7 +11,9 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_mmio.h" #include "xe_trace.h" +#include "regs/xe_guc_regs.h" #define TLB_TIMEOUT (HZ / 4) @@ -210,7 +212,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ -int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -222,6 +224,43 @@ int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) ARRAY_SIZE(action)); } +/** + * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT + * @gt: graphics tile + * + * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is + * synchronous. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe_guc_ct_enabled(>->uc.guc.ct) && + gt->uc.guc.submission_state.enabled) { + int seqno; + + seqno = xe_gt_tlb_invalidation_guc(gt); + if (seqno <= 0) + return seqno; + + xe_gt_tlb_invalidation_wait(gt, seqno); + } else if (xe_device_uc_enabled(xe)) { + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else { + xe_mmio_write32(gt, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + } + + return 0; +} + /** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: graphics tile diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index b333c1709397..fbb743d80d2c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -16,7 +16,7 @@ struct xe_vma; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_guc(struct xe_gt *gt); +int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); From a9e483dda3efa5b9aae5d9eef94d2c3a878d9bea Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:21 -0800 Subject: [PATCH 0004/1477] drm/xe: Don't support execlists in xe_gt_tlb_invalidation layer The xe_gt_tlb_invalidation layer implements TLB invalidations for a GuC backend. Simply return if in execlists mode. A follow up may properly implement the xe_gt_tlb_invalidation layer for both GuC and execlists. Fixes: a9351846d945 ("drm/xe: Break of TLB invalidation into its own file") Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a7b1f7cfec87..f29ee1ccfa71 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -287,6 +287,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -355,6 +363,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct xe_guc *guc = >->uc.guc; int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order From 30c399529f4c64e9671cba832630629d9cd08bf3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:24 -0500 Subject: [PATCH 0005/1477] drm/xe: Document Xe PM component Replace outdated information with a proper PM documentation. Already establish the rules for the runtime PM get and put that Xe needs to follow. Also add missing function documentation to all the "exported" functions. v2: updated after Francois' feedback. s/grater/greater (Matt) v3: detach D3 from runtime_pm remove opportunistic S0iX (Anshuman) Cc: Matthew Auld Cc: Anshuman Gupta Acked-by: Francois Dugast Reviewed-by: Francois Dugast #v2 Signed-off-by: Rodrigo Vivi Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_pm.c | 109 +++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ab283e9a8b4e..ae220f21acb1 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -25,21 +25,47 @@ /** * DOC: Xe Power Management * - * Xe PM shall be guided by the simplicity. - * Use the simplest hook options whenever possible. - * Let's not reinvent the runtime_pm references and hooks. - * Shall have a clear separation of display and gt underneath this component. + * Xe PM implements the main routines for both system level suspend states and + * for the opportunistic runtime suspend states. * - * What's next: + * System Level Suspend (S-States) - In general this is OS initiated suspend + * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), + * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They + * are the main point for the suspend to and resume from these states. * - * For now s2idle and s3 are only working in integrated devices. The next step - * is to iterate through all VRAM's BO backing them up into the system memory - * before allowing the system suspend. + * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power + * state D3, controlled by the PCI subsystem and ACPI with the help from the + * runtime_pm infrastructure. + * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory + * alive and quicker low latency resume or D3Cold where Vcc power is off for + * better power savings. + * The Vcc control of PCI hierarchy can only be controlled at the PCI root port + * level, while the device driver can be behind multiple bridges/switches and + * paired with other devices. For this reason, the PCI subsystem cannot perform + * the transition towards D3Cold. The lowest runtime PM possible from the PCI + * subsystem is D3hot. Then, if all these paired devices in the same root port + * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) + * to perform the transition from D3hot to D3cold. Xe may disallow this + * transition by calling pci_d3cold_disable(root_pdev) before going to runtime + * suspend. It will be based on runtime conditions such as VRAM usage for a + * quick and low latency resume for instance. * - * Also runtime_pm needs to be here from the beginning. + * Runtime PM - This infrastructure provided by the Linux kernel allows the + * device drivers to indicate when the can be runtime suspended, so the device + * could be put at D3 (if supported), or allow deeper package sleep states + * (PC-states), and/or other low level power states. Xe PM component provides + * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI + * subsystem will call before transition to/from runtime suspend. * - * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC - * and no wait boost. Frequency optimizations should come on a next stage. + * Also, Xe PM provides get and put functions that Xe driver will use to + * indicate activity. In order to avoid locking complications with the memory + * management, whenever possible, these get and put functions needs to be called + * from the higher/outer levels. + * The main cases that need to be protected from the outer levels are: IOCTL, + * sysfs, debugfs, dma-buf sharing, GPU execution. + * + * This component is not responsible for GT idleness (RC6) nor GT frequency + * management (RPS). */ /** @@ -178,6 +204,12 @@ void xe_pm_init_early(struct xe_device *xe) drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); } +/** + * xe_pm_init - Initialize Xe Power Management + * @xe: xe device instance + * + * This component is responsible for System and Device sleep states. + */ void xe_pm_init(struct xe_device *xe) { /* For now suspend/resume is only allowed with GuC */ @@ -196,6 +228,10 @@ void xe_pm_init(struct xe_device *xe) xe_pm_runtime_init(xe); } +/** + * xe_pm_runtime_fini - Finalize Runtime PM + * @xe: xe device instance + */ void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -225,6 +261,12 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) return READ_ONCE(xe->pm_callback_task); } +/** + * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold + * @xe: xe device instance + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_runtime_suspend(struct xe_device *xe) { struct xe_bo *bo, *on; @@ -290,6 +332,12 @@ int xe_pm_runtime_suspend(struct xe_device *xe) return err; } +/** + * xe_pm_runtime_resume - Waking up from D3hot/D3Cold + * @xe: xe device instance + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_runtime_resume(struct xe_device *xe) { struct xe_gt *gt; @@ -341,22 +389,47 @@ int xe_pm_runtime_resume(struct xe_device *xe) return err; } +/** + * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_get(struct xe_device *xe) { return pm_runtime_get_sync(xe->drm.dev); } +/** + * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_put(struct xe_device *xe) { pm_runtime_mark_last_busy(xe->drm.dev); return pm_runtime_put(xe->drm.dev); } +/** + * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_get_if_active(struct xe_device *xe) { return pm_runtime_get_if_active(xe->drm.dev, true); } +/** + * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge + * @xe: xe device instance + */ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -371,6 +444,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } } +/** + * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * @xe: xe device instance + * @threshold: VRAM size in bites for the D3cold threshold + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { struct ttm_resource_manager *man; @@ -395,6 +475,13 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) return 0; } +/** + * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed + * @xe: xe device instance + * + * To be called during runtime_pm idle callback. + * Check for all the D3Cold conditions ahead of runtime suspend. + */ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) { struct ttm_resource_manager *man; From 0f9d886f0bf93394b09b1dfe5397f7842c0f48b9 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:25 -0500 Subject: [PATCH 0006/1477] drm/xe: Convert mem_access assertion towards the runtime_pm state The mem_access helpers are going away and getting replaced by direct calls of the xe_pm_runtime_{get,put} functions. However, an assertion with a warning splat is desired when we hit the worst case of a memory access with the device really in the 'suspended' state. Also, this needs to be the first step. Otherwise, the upcoming conversion would be really noise with warn splats of missing mem_access gets. v2: Minor doc changes as suggested by Matt Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_device.c | 13 ++++++++++++- drivers/gpu/drm/xe/xe_pm.c | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca85e81fdb44..049bbca6ed56 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -620,9 +620,20 @@ bool xe_device_mem_access_ongoing(struct xe_device *xe) return atomic_read(&xe->mem_access.ref); } +/** + * xe_device_assert_mem_access - Inspect the current runtime_pm state. + * @xe: xe device instance + * + * To be used before any kind of memory access. It will splat a debug warning + * if the device is currently sleeping. But it doesn't guarantee in any way + * that the device is going to remain awake. Xe PM runtime get and put + * functions might be added to the outer bound of the memory access, while + * this check is intended for inner usage to splat some warning if the worst + * case has just happened. + */ void xe_device_assert_mem_access(struct xe_device *xe) { - XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); + XE_WARN_ON(xe_pm_runtime_suspended(xe)); } bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ae220f21acb1..5289d8c0c3b1 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -261,6 +261,22 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) return READ_ONCE(xe->pm_callback_task); } +/** + * xe_pm_runtime_suspended - Check if runtime_pm state is suspended + * @xe: xe device instance + * + * This does not provide any guarantee that the device is going to remain + * suspended as it might be racing with the runtime state transitions. + * It can be used only as a non-reliable assertion, to ensure that we are not in + * the sleep state while trying to access some memory for instance. + * + * Returns true if PCI device is suspended, false otherwise. + */ +bool xe_pm_runtime_suspended(struct xe_device *xe) +{ + return pm_runtime_suspended(xe->drm.dev); +} + /** * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 64a97c6726a7..75919eba1746 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -23,6 +23,7 @@ int xe_pm_resume(struct xe_device *xe); void xe_pm_init_early(struct xe_device *xe); void xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); +bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); int xe_pm_runtime_get(struct xe_device *xe); From 23cf006beac3db89f946a52c962cd16c82066c5c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:26 -0500 Subject: [PATCH 0007/1477] drm/xe: Runtime PM wake on every IOCTL Let's ensure our PCI device is awaken on every IOCTL entry. Let's increase the runtime_pm protection and start moving that to the outer bounds. v2: minor typo fix and renaming function to make it clear that is intended to be used by ioctl only. (Matt) v3: Make it NULL if CONFIG_COMPAT is not selected. Cc: Matthew Auld Reviewed-by: Francois Dugast Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-3-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_device.c | 37 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_pm.c | 15 ++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 049bbca6ed56..919ad88f0495 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -136,15 +136,48 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), }; +static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct drm_file *file_priv = file->private_data; + struct xe_device *xe = to_xe_device(file_priv->minor->dev); + long ret; + + ret = xe_pm_runtime_get_ioctl(xe); + if (ret >= 0) + ret = drm_ioctl(file, cmd, arg); + xe_pm_runtime_put(xe); + + return ret; +} + +#ifdef CONFIG_COMPAT +static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct drm_file *file_priv = file->private_data; + struct xe_device *xe = to_xe_device(file_priv->minor->dev); + long ret; + + ret = xe_pm_runtime_get_ioctl(xe); + if (ret >= 0) + ret = drm_compat_ioctl(file, cmd, arg); + xe_pm_runtime_put(xe); + + return ret; +} +#else +/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */ +#define xe_drm_compat_ioctl NULL +#endif + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, - .unlocked_ioctl = drm_ioctl, + .unlocked_ioctl = xe_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, - .compat_ioctl = drm_compat_ioctl, + .compat_ioctl = xe_drm_compat_ioctl, .llseek = noop_llseek, #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 5289d8c0c3b1..b5511e3c3153 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -430,6 +430,21 @@ int xe_pm_runtime_put(struct xe_device *xe) return pm_runtime_put(xe->drm.dev); } +/** + * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ +int xe_pm_runtime_get_ioctl(struct xe_device *xe) +{ + if (WARN_ON(xe_pm_read_callback_task(xe) == current)) + return -ELOOP; + + return pm_runtime_get_sync(xe->drm.dev); +} + /** * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 75919eba1746..7f5884babb29 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -27,6 +27,7 @@ bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); int xe_pm_runtime_get(struct xe_device *xe); +int xe_pm_runtime_get_ioctl(struct xe_device *xe); int xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); From 6a0784e6001f42b87af21ab63b3935ef4d6c6407 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:27 -0500 Subject: [PATCH 0008/1477] drm/xe: Convert kunit tests from mem_access to xe_pm_runtime Let's convert the kunit tests that are currently relying on xe_device_mem_access_{get,put} towards the direct xe_pm_runtime_{get,put}. While doing this we need to move the get/put calls towards the outer bounds of the tests to ensure consistency with the other usages of pm_runtime on the regular paths. v2: include xe_pm.h in tests/xe_mocs.c and sort the include block while at it. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-4-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/tests/xe_bo.c | 8 ++++---- drivers/gpu/drm/xe/tests/xe_migrate.c | 7 +++++-- drivers/gpu/drm/xe/tests/xe_mocs.c | 19 +++++++++++++------ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 3436fd9cf2b2..0926a1c2eb86 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -163,7 +163,7 @@ static int ccs_test_run_device(struct xe_device *xe) return 0; } - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); for_each_tile(tile, xe, id) { /* For igfx run only for primary tile */ @@ -172,7 +172,7 @@ static int ccs_test_run_device(struct xe_device *xe) ccs_test_run_tile(xe, tile, test); } - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -335,12 +335,12 @@ static int evict_test_run_device(struct xe_device *xe) return 0; } - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); for_each_tile(tile, xe, id) evict_test_run_tile(xe, tile, test); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index a6523df0f1d3..ce531498f57f 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -10,6 +10,7 @@ #include "tests/xe_pci_test.h" #include "xe_pci.h" +#include "xe_pm.h" static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, const char *str, struct kunit *test) @@ -423,17 +424,19 @@ static int migrate_test_run_device(struct xe_device *xe) struct xe_tile *tile; int id; + xe_pm_runtime_get(xe); + for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->q->vm, true); - xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); - xe_device_mem_access_put(xe); xe_vm_unlock(m->q->vm); } + xe_pm_runtime_put(xe); + return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index df5c36b70ab4..df0cbb2ddcb5 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -10,10 +10,11 @@ #include "tests/xe_pci_test.h" #include "tests/xe_test.h" -#include "xe_pci.h" +#include "xe_device.h" #include "xe_gt.h" #include "xe_mocs.h" -#include "xe_device.h" +#include "xe_pci.h" +#include "xe_pm.h" struct live_mocs { struct xe_mocs_info table; @@ -45,7 +46,6 @@ static void read_l3cc_table(struct xe_gt *gt, struct kunit *test = xe_cur_kunit(); - xe_device_mem_access_get(gt_to_xe(gt)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); @@ -65,7 +65,6 @@ static void read_l3cc_table(struct xe_gt *gt, XELP_LNCFCMOCS(i).addr); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_device_mem_access_put(gt_to_xe(gt)); } static void read_mocs_table(struct xe_gt *gt, @@ -80,7 +79,6 @@ static void read_mocs_table(struct xe_gt *gt, struct kunit *test = xe_cur_kunit(); - xe_device_mem_access_get(gt_to_xe(gt)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); @@ -100,7 +98,6 @@ static void read_mocs_table(struct xe_gt *gt, XELP_GLOBAL_MOCS(i).addr); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_device_mem_access_put(gt_to_xe(gt)); } static int mocs_kernel_test_run_device(struct xe_device *xe) @@ -113,6 +110,8 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) unsigned int flags; int id; + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); if (flags & HAS_GLOBAL_MOCS) @@ -120,6 +119,9 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); } + + xe_pm_runtime_put(xe); + return 0; } @@ -139,6 +141,8 @@ static int mocs_reset_test_run_device(struct xe_device *xe) int id; struct kunit *test = xe_cur_kunit(); + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); kunit_info(test, "mocs_reset_test before reset\n"); @@ -156,6 +160,9 @@ static int mocs_reset_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); } + + xe_pm_runtime_put(xe); + return 0; } From 3f0e14651ab0ca340c4b6f77b2ea615605fcf8f8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:28 -0500 Subject: [PATCH 0009/1477] drm/xe: Runtime PM wake on every sysfs call Let's ensure our PCI device is awaken on every sysfs call. Let's increase the runtime_pm protection and start moving that to the outer bounds. For now, for the files with small number of attr functions, let's only call the runtime pm functions directly. For the hw_engines entries with many files, let's add the sysfs_ops wrapper. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-5-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_device_sysfs.c | 4 ++ drivers/gpu/drm/xe/xe_gt_freq.c | 38 +++++++++++- drivers/gpu/drm/xe/xe_gt_idle.c | 23 +++++++- drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c | 3 + drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 58 ++++++++++++++++++- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 7 +++ drivers/gpu/drm/xe/xe_tile_sysfs.c | 1 + 7 files changed, 129 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 99113a5a2b84..e47c8ad1bb17 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -35,7 +35,9 @@ vram_d3cold_threshold_show(struct device *dev, if (!xe) return -EINVAL; + xe_pm_runtime_get(xe); ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); + xe_pm_runtime_put(xe); return ret; } @@ -58,7 +60,9 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold); + xe_pm_runtime_get(xe); ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold); + xe_pm_runtime_put(xe); return ret ?: count; } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index e5b0f4ecdbe8..32b9a743629c 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -15,6 +15,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_throttle_sysfs.h" #include "xe_guc_pc.h" +#include "xe_pm.h" /** * DOC: Xe GT Frequency Management @@ -49,12 +50,23 @@ dev_to_pc(struct device *dev) return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc; } +static struct xe_device * +dev_to_xe(struct device *dev) +{ + return gt_to_xe(kobj_to_gt(dev->kobj.parent)); +} + static ssize_t act_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc)); + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_act_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(act_freq); @@ -65,7 +77,9 @@ static ssize_t cur_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_cur_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -77,8 +91,13 @@ static ssize_t rp0_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rp0_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(rp0_freq); @@ -86,8 +105,13 @@ static ssize_t rpe_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc)); + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rpe_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(rpe_freq); @@ -107,7 +131,9 @@ static ssize_t min_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_min_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -125,7 +151,9 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_set_min_freq(pc, freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -140,7 +168,9 @@ static ssize_t max_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_max_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -158,7 +188,9 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_set_max_freq(pc, freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 9fcae65b6469..2984680de3f9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -12,6 +12,7 @@ #include "xe_guc_pc.h" #include "regs/xe_gt_regs.h" #include "xe_mmio.h" +#include "xe_pm.h" /** * DOC: Xe GT Idle @@ -40,6 +41,15 @@ static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) return >idle_to_gt(gtidle)->uc.guc.pc; } +static struct xe_device * +pc_to_xe(struct xe_guc_pc *pc) +{ + struct xe_guc *guc = container_of(pc, struct xe_guc, pc); + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + + return gt_to_xe(gt); +} + static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) { switch (state) { @@ -86,8 +96,14 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buff) { struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + ssize_t ret; - return sysfs_emit(buff, "%s\n", gtidle->name); + xe_pm_runtime_get(pc_to_xe(pc)); + ret = sysfs_emit(buff, "%s\n", gtidle->name); + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; } static DEVICE_ATTR_RO(name); @@ -98,7 +114,9 @@ static ssize_t idle_status_show(struct device *dev, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; + xe_pm_runtime_get(pc_to_xe(pc)); state = gtidle->idle_status(pc); + xe_pm_runtime_put(pc_to_xe(pc)); return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } @@ -111,7 +129,10 @@ static ssize_t idle_residency_ms_show(struct device *dev, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; + xe_pm_runtime_get(pc_to_xe(pc)); residency = gtidle->idle_residency(pc); + xe_pm_runtime_put(pc_to_xe(pc)); + return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); } static DEVICE_ATTR_RO(idle_residency_ms); diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c index 63d640591a52..9c33045ff1ef 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c @@ -11,6 +11,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_throttle_sysfs.h" #include "xe_mmio.h" +#include "xe_pm.h" /** * DOC: Xe GT Throttle @@ -38,10 +39,12 @@ static u32 read_perf_limit_reasons(struct xe_gt *gt) { u32 reg; + xe_pm_runtime_get(gt_to_xe(gt)); if (xe_gt_is_media_type(gt)) reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS); else reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS); + xe_pm_runtime_put(gt_to_xe(gt)); return reg; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 2345fb42fa39..9e23ca7f45ad 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -9,6 +9,7 @@ #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_pm.h" #define MAX_ENGINE_CLASS_NAME_LEN 16 static int xe_add_hw_engine_class_defaults(struct xe_device *xe, @@ -513,6 +514,7 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name kobject_put(&keclass->base); return NULL; } + keclass->xe = xe; err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, &keclass->base); @@ -567,9 +569,63 @@ static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) kfree(kobj); } +#include "xe_pm.h" + +static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +static inline struct xe_device *to_xe_device(const struct drm_device *dev) +{ + return container_of(dev, struct xe_device, drm); +} + +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &kobj_sysfs_ops, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, }; static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index ec5ba673b314..28a0d7c909c0 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -26,6 +26,8 @@ struct kobj_eclass { struct kobject base; /** @eclass: A pointer to the hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @xe: A pointer to the xe device */ + struct xe_device *xe; }; static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj) @@ -33,4 +35,9 @@ static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kob return container_of(kobj, struct kobj_eclass, base)->eclass; } +static inline struct xe_device *kobj_to_xe(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_eclass, base)->xe; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 0662968d7bcb..237a0761d3ad 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -7,6 +7,7 @@ #include #include +#include "xe_pm.h" #include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" From 1e941c9881ec20f6d0173bcd344a605bb89cb121 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:29 -0500 Subject: [PATCH 0010/1477] drm/xe: Remove mem_access from guc_pc calls We are now protected by init, sysfs, or removal and don't need these mem_access protections around GuC_PC anymore. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-6-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 64 ++++++---------------------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2839d685631b..f4b031b8d9de 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -381,8 +381,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) struct xe_device *xe = gt_to_xe(gt); u32 freq; - xe_device_mem_access_get(gt_to_xe(gt)); - /* When in RC6, actual frequency reported will be 0. */ if (GRAPHICS_VERx100(xe) >= 1270) { freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); @@ -394,8 +392,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) freq = decode_freq(freq); - xe_device_mem_access_put(gt_to_xe(gt)); - return freq; } @@ -412,14 +408,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_mem_access_get(gt_to_xe(gt)); /* * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out; + return ret; *freq = xe_mmio_read32(gt, RPNSWREQ); @@ -427,9 +422,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) *freq = decode_freq(*freq); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(gt_to_xe(gt)); - return ret; + return 0; } /** @@ -451,12 +444,7 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - - xe_device_mem_access_get(xe); pc_update_rp_values(pc); - xe_device_mem_access_put(xe); return pc->rpe_freq; } @@ -485,7 +473,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -511,7 +498,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -528,7 +514,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -544,8 +529,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; } @@ -561,7 +544,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -577,7 +559,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -594,7 +575,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -610,7 +590,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -623,8 +602,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg, gt_c_state; - xe_device_mem_access_get(gt_to_xe(gt)); - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg); @@ -633,8 +610,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) gt_c_state = REG_FIELD_GET(RCN_MASK, reg); } - xe_device_mem_access_put(gt_to_xe(gt)); - switch (gt_c_state) { case GT_C6: return GT_IDLE_C6; @@ -654,9 +629,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg; - xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, GT_GFX_RC6); - xe_device_mem_access_put(gt_to_xe(gt)); return reg; } @@ -670,9 +643,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u64 reg; - xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, MTL_MEDIA_MC6); - xe_device_mem_access_put(gt_to_xe(gt)); return reg; } @@ -801,23 +772,19 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (xe->info.skip_guc_pc) return 0; - xe_device_mem_access_get(pc_to_xe(pc)); - ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); if (ret) - goto out; + return ret; ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out; + return ret; xe_gt_idle_disable_c6(gt); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return 0; } static void pc_init_pcode_freq(struct xe_guc_pc *pc) @@ -870,11 +837,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_gt_assert(gt, xe_device_uc_enabled(xe)); - xe_device_mem_access_get(pc_to_xe(pc)); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out_fail_force_wake; + return ret; if (xe->info.skip_guc_pc) { if (xe->info.platform != XE_PVC) @@ -914,8 +879,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out_fail_force_wake: - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -928,12 +891,9 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); int ret; - xe_device_mem_access_get(pc_to_xe(pc)); - if (xe->info.skip_guc_pc) { xe_gt_idle_disable_c6(pc_to_gt(pc)); - ret = 0; - goto out; + return 0; } mutex_lock(&pc->freq_lock); @@ -942,16 +902,14 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) ret = pc_action_shutdown(pc); if (ret) - goto out; + return ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); - ret = -EIO; + return -EIO; } -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return 0; } /** @@ -965,9 +923,7 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) struct xe_device *xe = pc_to_xe(pc); if (xe->info.skip_guc_pc) { - xe_device_mem_access_get(xe); xe_gt_idle_disable_c6(pc_to_gt(pc)); - xe_device_mem_access_put(xe); return; } From 5b2b3a0fbb287d862cb39469a0f4826d8073b30e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:30 -0500 Subject: [PATCH 0011/1477] drm/xe: Runtime PM wake on every debugfs call Let's ensure our PCI device is awaken on every debugfs call. Let's increase the runtime_pm protection and start moving that to the outer bounds. Also let's remove the mem_access_{get,put} from where they are not needed anymore. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-7-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_debugfs.c | 10 +++--- drivers/gpu/drm/xe/xe_gt_debugfs.c | 53 ++++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_guc_debugfs.c | 9 ++--- drivers/gpu/drm/xe/xe_huc_debugfs.c | 5 +-- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 5 ++- 5 files changed, 66 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 01db5b27bec5..8abdf3c17e1d 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -12,6 +12,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt_debugfs.h" +#include "xe_pm.h" #include "xe_step.h" #ifdef CONFIG_DRM_XE_DEBUG @@ -37,6 +38,8 @@ static int info(struct seq_file *m, void *data) struct xe_gt *gt; u8 id; + xe_pm_runtime_get(xe); + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", @@ -63,6 +66,7 @@ static int info(struct seq_file *m, void *data) gt->info.engine_mask); } + xe_pm_runtime_put(xe); return 0; } @@ -76,8 +80,7 @@ static int forcewake_open(struct inode *inode, struct file *file) struct xe_gt *gt; u8 id; - xe_device_mem_access_get(xe); - + xe_pm_runtime_get(xe); for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -92,8 +95,7 @@ static int forcewake_release(struct inode *inode, struct file *file) for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c4b67cf09f8f..6b4dc2927727 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -18,6 +18,7 @@ #include "xe_lrc.h" #include "xe_macros.h" #include "xe_pat.h" +#include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_uc_debugfs.h" @@ -37,10 +38,10 @@ static int hw_engines(struct seq_file *m, void *data) enum xe_hw_engine_id id; int err; - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) { - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return err; } @@ -48,7 +49,7 @@ static int hw_engines(struct seq_file *m, void *data) xe_hw_engine_print(hwe, &p); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); if (err) return err; @@ -59,18 +60,23 @@ static int force_reset(struct seq_file *m, void *data) { struct xe_gt *gt = node_to_gt(m->private); + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_reset_async(gt); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } static int sa_info(struct seq_file *m, void *data) { - struct xe_tile *tile = gt_to_tile(node_to_gt(m->private)); + struct xe_gt *gt = node_to_gt(m->private); + struct xe_tile *tile = gt_to_tile(gt); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p, tile->mem.kernel_bb_pool->gpu_addr); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -80,7 +86,9 @@ static int topology(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_topology_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -90,7 +98,9 @@ static int steering(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_mcr_steering_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -99,8 +109,13 @@ static int ggtt(struct seq_file *m, void *data) { struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + int ret; - return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); + xe_pm_runtime_get(gt_to_xe(gt)); + ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return ret; } static int register_save_restore(struct seq_file *m, void *data) @@ -110,6 +125,8 @@ static int register_save_restore(struct seq_file *m, void *data) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; + xe_pm_runtime_get(gt_to_xe(gt)); + xe_reg_sr_dump(>->reg_sr, &p); drm_printf(&p, "\n"); @@ -127,6 +144,8 @@ static int register_save_restore(struct seq_file *m, void *data) for_each_hw_engine(hwe, gt, id) xe_reg_whitelist_dump(&hwe->reg_whitelist, &p); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } @@ -135,7 +154,9 @@ static int workarounds(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_wa_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } @@ -145,48 +166,70 @@ static int pat(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_pat_dump(gt, &p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } static int rcs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int ccs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int bcs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int vcs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } static int vecs_default_lrc(struct seq_file *m, void *data) { + struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE); + xe_pm_runtime_put(gt_to_xe(gt)); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index ffd7d53bcc42..d3822cbea273 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -14,6 +14,7 @@ #include "xe_guc_ct.h" #include "xe_guc_log.h" #include "xe_macros.h" +#include "xe_pm.h" static struct xe_guc *node_to_guc(struct drm_info_node *node) { @@ -26,9 +27,9 @@ static int guc_info(struct seq_file *m, void *data) struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_guc_print_info(guc, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -39,9 +40,9 @@ static int guc_log(struct seq_file *m, void *data) struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_guc_log_print(&guc->log, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index 18585a7eeb9d..3a888a40188b 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -12,6 +12,7 @@ #include "xe_gt.h" #include "xe_huc.h" #include "xe_macros.h" +#include "xe_pm.h" static struct xe_gt * huc_to_gt(struct xe_huc *huc) @@ -36,9 +37,9 @@ static int huc_info(struct seq_file *m, void *data) struct xe_device *xe = huc_to_xe(huc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_huc_print_info(huc, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index 3e1fa0c832ca..9844a8edbfe1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -73,7 +73,10 @@ static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man, static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { - + /* + * This function is called by debugfs entry and would require + * pm_runtime_{get,put} wrappers around any operation. + */ } static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { From db5a5a8338347ef43bb53ccab730cde326d030e8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:31 -0500 Subject: [PATCH 0012/1477] drm/xe: Replace dma_buf mem_access per direct xe_pm_runtime calls Continue on the path to entirely remove mem_access helpers in favour of the direct xe_pm_runtime calls. This item is one of the direct outer bounds of the protection. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-8-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_dma_buf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index da2627ed6ae7..5b26af21e029 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -16,6 +16,7 @@ #include "tests/xe_test.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_pm.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" @@ -33,7 +34,7 @@ static int xe_dma_buf_attach(struct dma_buf *dmabuf, if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) return -EOPNOTSUPP; - xe_device_mem_access_get(to_xe_device(obj->dev)); + xe_pm_runtime_get(to_xe_device(obj->dev)); return 0; } @@ -42,7 +43,7 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf, { struct drm_gem_object *obj = attach->dmabuf->priv; - xe_device_mem_access_put(to_xe_device(obj->dev)); + xe_pm_runtime_put(to_xe_device(obj->dev)); } static int xe_dma_buf_pin(struct dma_buf_attachment *attach) From 5a2a90847450f193f681886db56c3ace5a945785 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:32 -0500 Subject: [PATCH 0013/1477] drm/xe: Convert hwmon from mem_access to xe_pm_runtime calls Continue the work to kill the mem_access in favor of a pure runtime pm. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-9-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_hwmon.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index b82233a41606..a256af8c2012 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -18,6 +18,7 @@ #include "xe_pcode.h" #include "xe_pcode_api.h" #include "xe_sriov.h" +#include "xe_pm.h" enum xe_hwmon_reg { REG_PKG_RAPL_LIMIT, @@ -266,7 +267,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); @@ -275,7 +276,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a mutex_unlock(&hwmon->hwmon_lock); - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); @@ -354,7 +355,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); @@ -363,7 +364,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * mutex_unlock(&hwmon->hwmon_lock); - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return count; } @@ -384,12 +385,12 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0; - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -610,7 +611,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: @@ -630,7 +631,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -642,7 +643,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: @@ -662,7 +663,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -674,7 +675,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: @@ -688,7 +689,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } From f9d9f94cfb25c9be40a6f51fbfdd1131dfc55ded Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:33 -0500 Subject: [PATCH 0014/1477] drm/xe: Remove useless mem_access protection for query ioctls Every IOCTL is already protected on its outer bounds by xe_pm_runtime_{get,put} calls, so we can now remove these. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-10-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_query.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 92bb06c0586e..f1876b556ab4 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -147,7 +147,6 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - xe_device_mem_access_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); __read_timestamps(gt, @@ -159,7 +158,6 @@ query_engine_cycles(struct xe_device *xe, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); resp.width = 36; /* Only write to the output fields of user query */ @@ -433,9 +431,7 @@ static int query_hwconfig(struct xe_device *xe, if (!hwconfig) return -ENOMEM; - xe_device_mem_access_get(xe); xe_guc_hwconfig_copy(>->uc.guc, hwconfig); - xe_device_mem_access_put(xe); if (copy_to_user(query_ptr, hwconfig, size)) { kfree(hwconfig); From 48fef288070b31a6a04ed13c929f1d15ebf52fe5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:34 -0500 Subject: [PATCH 0015/1477] drm/xe: Convert gsc_work from mem_access to xe_pm_runtime Let's directly use xe_pm_runtime_{get,put} instead of the mem_access helpers that are going away soon. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-11-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_gsc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index a61994292c43..d9aa815a5bc2 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -21,6 +21,7 @@ #include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sched_job.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -285,7 +286,7 @@ static void gsc_work(struct work_struct *work) gsc->work_actions = 0; spin_unlock_irq(&gsc->lock); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); if (actions & GSC_ACTION_FW_LOAD) { @@ -300,7 +301,7 @@ static void gsc_work(struct work_struct *work) xe_gsc_proxy_request_handler(gsc); xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); } int xe_gsc_init(struct xe_gsc *gsc) From 5d118681d341fbf470d06bf1938d996d3bed6a1d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:35 -0500 Subject: [PATCH 0016/1477] drm/xe: Remove mem_access from suspend and resume functions At these points, we are sure that device is awake in D0. Likely in the middle of the transition, but awake. So, these extra protections are useless. Let's remove it and continue with the killing of xe_device_mem_access. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-12-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index b75f0bf0a9a1..e0c79351a131 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -711,13 +711,11 @@ void xe_gt_reset_async(struct xe_gt *gt) void xe_gt_suspend_prepare(struct xe_gt *gt) { - xe_device_mem_access_get(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_uc_stop_prepare(>->uc); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); } int xe_gt_suspend(struct xe_gt *gt) @@ -726,7 +724,6 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_sanitize(gt); - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -736,7 +733,6 @@ int xe_gt_suspend(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_info(gt, "suspended\n"); return 0; @@ -744,7 +740,6 @@ int xe_gt_suspend(struct xe_gt *gt) err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; @@ -754,7 +749,6 @@ int xe_gt_resume(struct xe_gt *gt) { int err; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -764,7 +758,6 @@ int xe_gt_resume(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_info(gt, "resumed\n"); return 0; @@ -772,7 +765,6 @@ int xe_gt_resume(struct xe_gt *gt) err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; From 23be0e6e8316aaab6acfe15f035960914f820a2a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:36 -0500 Subject: [PATCH 0017/1477] drm/xe: Convert gt_reset from mem_access to xe_pm_runtime We need to ensure that device is in D0 on any kind of GT reset. We are likely already protected by outer bounds like exec, but if exec/sched ref gets dropped on a hang, we might transition to D3 before we are able to perform the gt_reset and recover. Suggested-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-13-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index e0c79351a131..45646d3aea2d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -43,6 +43,7 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" +#include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" #include "xe_ring_ops.h" @@ -644,9 +645,9 @@ static int gt_reset(struct xe_gt *gt) goto err_fail; } + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -670,8 +671,8 @@ static int gt_reset(struct xe_gt *gt) goto err_out; err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(err); + xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); @@ -681,7 +682,7 @@ static int gt_reset(struct xe_gt *gt) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: XE_WARN_ON(xe_uc_start(>->uc)); - xe_device_mem_access_put(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); From 1732391763634b9e877670ecf2e2268d7323ee15 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2024 11:39:37 -0500 Subject: [PATCH 0018/1477] drm/xe: Remove useless mem_access on PAT dumps PAT dumps are already protected by the xe_pm_runtime_{get,put} around the debugfs call. So, these can be removed. Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222163937.138342-14-rodrigo.vivi@intel.com --- drivers/gpu/drm/xe/xe_pat.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index e148934d554b..66d8e3dd8237 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -174,7 +174,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -192,7 +191,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xelp_pat_ops = { @@ -205,7 +203,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -225,7 +222,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xehp_pat_ops = { @@ -238,7 +234,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -256,7 +251,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xehpc_pat_ops = { @@ -269,7 +263,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -292,7 +285,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } /* @@ -325,7 +317,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) int i, err; u32 pat; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -370,7 +361,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xe2_pat_ops = { From b2121f2bd2232cd0556b2182078d159d81497885 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 8 Feb 2024 10:35:39 -0800 Subject: [PATCH 0019/1477] drm/xe: Extend uAPI to query HuC micro-controler firmware version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The infrastructure to query GuC firmware version is already in place. It is extended with a new micro-controller type to query the HuC firmware version. It can be used from user space to know if HuC is running. Cc: John Harrison Cc: Francois Dugast Cc: Lucas De Marchi Signed-off-by: Francois Dugast Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240208183539.185095-2-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_query.c | 38 +++++++++++++++++++++++++++++++---- include/uapi/drm/xe_drm.h | 1 + 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index f1876b556ab4..a6a20a6dd360 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -540,14 +540,44 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; break; } + case XE_QUERY_UC_TYPE_HUC: { + struct xe_gt *media_gt = NULL; + struct xe_huc *huc; + + if (MEDIA_VER(xe) >= 13) { + struct xe_tile *tile; + u8 gt_id; + + for_each_tile(tile, xe, gt_id) { + if (tile->media_gt) { + media_gt = tile->media_gt; + break; + } + } + } else { + media_gt = xe->tiles[0].primary_gt; + } + + if (!media_gt) + break; + + huc = &media_gt->uc.huc; + if (huc->fw.status == XE_UC_FIRMWARE_RUNNING) + version = &huc->fw.versions.found[XE_UC_FW_VER_RELEASE]; + break; + } default: return -EINVAL; } - resp.branch_ver = 0; - resp.major_ver = version->major; - resp.minor_ver = version->minor; - resp.patch_ver = version->patch; + if (version) { + resp.branch_ver = 0; + resp.major_ver = version->major; + resp.minor_ver = version->minor; + resp.patch_ver = version->patch; + } else { + return -ENODEV; + } if (copy_to_user(query_ptr, &resp, size)) return -EFAULT; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 538a3ac95c54..2fc19177d2b0 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -583,6 +583,7 @@ struct drm_xe_query_engine_cycles { struct drm_xe_query_uc_fw_version { /** @uc_type: The micro-controller type to query firmware version */ #define XE_QUERY_UC_TYPE_GUC_SUBMISSION 0 +#define XE_QUERY_UC_TYPE_HUC 1 __u16 uc_type; /** @pad: MBZ */ From 35ed1d2bfff7b1969e7f99f3641a83ea54f037e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Feb 2024 07:55:54 -0800 Subject: [PATCH 0020/1477] drm/xe: Use vmalloc for array of bind allocation in bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use vmalloc in effort to allow a user pass in a large number of binds in an IOCTL (mesa use case). Also use array allocations rather open coding the size calculation. v2: Use __GFP_ACCOUNT for allocations (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240226155554.103384-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index db3f049a47dc..4154ef75e369 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2770,8 +2770,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2861,7 +2862,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2949,13 +2950,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3096,10 +3099,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3123,10 +3126,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } From ba6bbdc6eaef92998ec7f323c9e1211d344d2556 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 14 Feb 2024 16:53:53 -0800 Subject: [PATCH 0021/1477] drm/xe: get rid of MAX_BINDS Mesa has been issuing a single bind operation per ioctl since xe.ko changed to GPUVA due xe.ko bug #746. If I change Mesa to try again to issue every single bind operation it can in the same ioctl, it hits the MAX_BINDS assertion when running Vulkan conformance tests. Test dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 issues 960 bind operations in a single ioctl, it's the most I could find in the conformance suite. I don't see a reason to keep the MAX_BINDS restriction: it doesn't seem to be preventing any specific issue. If the number is too big for the memory allocations, then those will fail. Nothing related to num_binds seems to be using the stack. Let's just get rid of it. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Testcase: dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/746 Cc: Matthew Brost Signed-off-by: Paulo Zanoni Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240215005353.1295420-1-paulo.r.zanoni@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4154ef75e369..4e9ceb1817f9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2749,8 +2749,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2762,8 +2760,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { From e275d61c5f3ffc250b2a9601d36fbd11b4db774b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 23 Feb 2024 12:46:59 -0800 Subject: [PATCH 0022/1477] drm/xe/guc: Handle timing out of signaled jobs gracefully MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timing out of signaled jobs can happen during regular operations (e.g. an exec queue closed immediately after last fence signaled). The TDR can pass the worker which free jobs. Rather than running through the TDR if signaled job is found, simply free it without any debug messages. Cc: Thomas Hellström Reported-by: José Roberto de Souza Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1271 Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Tested-by: José Roberto de Souza Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240223204659.40750-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ff77bc8da1b2..29748e40555f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -929,20 +929,26 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; int i = 0; - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); + /* + * TDR has fired before free job worker. Common if exec queue + * immediately closed after last fence signaled. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { + guc_exec_queue_free_job(drm_job); - simple_error_capture(q); - xe_devcoredump(job); - } else { - drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); + return DRM_GPU_SCHED_STAT_NOMINAL; } + + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), q->guc->id, q->flags); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + + simple_error_capture(q); + xe_devcoredump(job); + trace_xe_sched_job_timedout(job); /* Kill the run_job entry point */ From 2e56e34d0d9cbce595e288676bc7eaadcebcd6e1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Feb 2024 09:20:42 +0000 Subject: [PATCH 0023/1477] drm/i915/dp: Fix spelling mistake "redect" -> "reject" There is a spelling mistake in a drm_dbg_kms message. Fix it. Signed-off-by: Colin Ian King Link: https://patchwork.freedesktop.org/patch/msgid/20240228092042.4125617-1-colin.i.king@gmail.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c index 75d76f91ecbd..6503abdc2b98 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c @@ -348,7 +348,7 @@ void intel_dp_tunnel_resume(struct intel_dp *intel_dp, out_err: drm_dbg_kms(&i915->drm, - "[DPTUN %s][CONNECTOR:%d:%s][ENCODER:%d:%s] Tunnel can't be resumed, will drop and redect it (err %pe)\n", + "[DPTUN %s][CONNECTOR:%d:%s][ENCODER:%d:%s] Tunnel can't be resumed, will drop and reject it (err %pe)\n", drm_dp_tunnel_name(intel_dp->tunnel), connector->base.base.id, connector->base.name, encoder->base.base.id, encoder->base.name, From 977e5b82e0901480bc201342d39f855fc0a2ef47 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:51 +0200 Subject: [PATCH 0024/1477] drm/xe: Expose user fence from xe_sync_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By allowing getting reference to user fence, we can control the lifetime outside of sync entries. This is needed to allow vma to track the associated user fence that was provided with bind ioctl. v2: xe_user_fence can be kept opaque (Jani, Matt) v3: indent fix (Matt) Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_sync.c | 58 ++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_sync.h | 4 +++ drivers/gpu/drm/xe/xe_sync_types.h | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index aab92bee1d7c..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -340,3 +342,39 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index f43cdcaca6c5..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; From 158900ade92cce5ab85a06d618eb51e6c7ffb28a Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:52 +0200 Subject: [PATCH 0025/1477] drm/xe: Deny unbinds if uapi ufence pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If user fence was provided for MAP in vm_bind_ioctl and it has still not been signalled, deny UNMAP of said vma with EBUSY as long as unsignalled fence exists. This guarantees that MAP vs UNMAP sequences won't escape under the radar if we ever want to track the client's state wrt to completed and accessible MAPs. By means of intercepting the ufence release signalling. v2: find ufence with num_fences > 1 (Matt) v3: careful on clearing vma ufence (Matt) Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1159 Cc: Thomas Hellström Cc: Matthew Brost Cc: Joonas Lahtinen Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-3-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.c | 37 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 7 ++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4e9ceb1817f9..d28260351af2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -903,6 +903,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; @@ -1622,6 +1627,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1755,6 +1770,21 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1762,9 +1792,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 3fce50b91256..4c4a569bde3d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,6 +19,7 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS @@ -105,6 +106,12 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; + + /** + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. + */ + struct xe_user_fence *ufence; }; /** From 0e6fec6da25167a568fbaeb8401d8172069124ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:36 +0100 Subject: [PATCH 0026/1477] drm/xe/kunit: fix link failure with built-in xe When the driver is built-in but the tests are in loadable modules, the helpers don't actually get put into the driver: ERROR: modpost: "xe_kunit_helper_alloc_xe_device" [drivers/gpu/drm/xe/tests/xe_test.ko] undefined! Change the Makefile to ensure they are always part of the driver even when the rest of the kunit tests are in loadable modules. Fixes: 5095d13d758b ("drm/xe/kunit: Define helper functions to allocate fake xe device") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-1-arnd@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/Kconfig.debug | 1 - drivers/gpu/drm/xe/Makefile | 6 ++++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 0e31dfb8989e..1a556d087e63 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -10,6 +10,7 @@ config DRM_XE select DRM_BUDDY select DRM_EXEC select DRM_KMS_HELPER + select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER select DRM_DISPLAY_DP_HELPER diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 549065f57a78..df02e5d17d26 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -76,7 +76,6 @@ config DRM_XE_KUNIT_TEST depends on DRM_XE && KUNIT && DEBUG_FS default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m - select DRM_KUNIT_TEST_HELPERS help Choose this option to allow the driver to perform selftests under the kunit framework diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index c531210695db..1a59c15f4d66 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -158,8 +158,10 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o -xe-$(CONFIG_DRM_XE_KUNIT_TEST) += \ - tests/xe_kunit_helpers.o +# include helpers for tests even when XE is built-in +ifdef CONFIG_DRM_XE_KUNIT_TEST +xe-y += tests/xe_kunit_helpers.o +endif # i915 Display compat #defines and #includes subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ From f5d3983366c0b88ec388b3407b29c1c0862ee2b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:37 +0100 Subject: [PATCH 0027/1477] drm/xe/mmio: fix build warning for BAR resize on 32-bit clang complains about a nonsensical test on builds with a 32-bit phys_addr_t, which means resizing will always fail: drivers/gpu/drm/xe/xe_mmio.c:109:23: error: result of comparison of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 109 | root_res->start > 0x100000000ull) | ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ Previously, BAR resize was always disallowed on 32-bit kernels, but this apparently changed recently. Since 32-bit machines can in theory support PAE/LPAE for large address spaces, this may end up useful, so change the driver to shut up the warning but still work when phys_addr_t/resource_size_t is 64 bit wide. Fixes: 9a6e6c14bfde ("drm/xe/mmio: Use non-atomic writeq/readq variant for 32b") Fixes: 237412e45390 ("drm/xe: Enable 32bits build") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-2-arnd@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index e3db3a178760..7ba2477452d7 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -106,7 +106,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } From 1408784b599927d2f361bac6dc5170d2ee275f17 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:38 +0100 Subject: [PATCH 0028/1477] drm/xe/xe2: fix 64-bit division in pte_update_size This function does not build on 32-bit targets when the compiler fails to reduce DIV_ROUND_UP() into a shift: ld.lld: error: undefined symbol: __aeabi_uldivmod >>> referenced by xe_migrate.c >>> drivers/gpu/drm/xe/xe_migrate.o:(pte_update_size) in archive vmlinux.a There are two instances in this function. Change the first to use an open-coded shift with the same behavior, and the second one to a 32-bit calculation, which is sufficient here as the size is never more than 2^32 pages (16TB). Fixes: 237412e45390 ("drm/xe: Enable 32bits build") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-3-arnd@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a66fdf2d2991..ee1bb938c493 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -462,7 +462,7 @@ static u32 pte_update_size(struct xe_migrate *m, } else { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); - u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT; *L0 = size; *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); From 669cf07d83a95310b565dfd59347013013107a73 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 14 Feb 2024 17:27:20 -0300 Subject: [PATCH 0029/1477] drm/i915/cdclk: Rename intel_cdclk_needs_modeset to intel_cdclk_clock_changed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like the name and description of intel_cdclk_needs_modeset() became inaccurate as of commit 59f9e9cab3a1 ("drm/i915: Skip modeset for cdclk changes if possible"), when it became possible to update the cdclk without requiring disabling the pipes when only changing the cd2x divider was enough. Later on we also added the same type of support with squash and crawling with commit 25e0e5ae5610 ("drm/i915/display: Do both crawl and squash when changing cdclk"), commit d4a23930490d ("drm/i915: Allow cdclk squasher to be reconfigured live") and commit d62686ba3b54 ("drm/i915/adl_p: CDCLK crawl support for ADL"). As such, update that function's name and documentation to something more appropriate, since the real checks for requiring modeset are done elsewhere. v2: - Rename to intel_cdclk_clock_changed instead of intel_cdclk_params_changed. (Ville) Cc: Ville Syrjälä Signed-off-by: Gustavo Sousa Reviewed-by: Ville Syrjälä Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240214202719.298407-2-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 13 ++++++------- drivers/gpu/drm/i915/display/intel_cdclk.h | 2 +- .../gpu/drm/i915/display/intel_display_power_well.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ed89b86ea625..77a25caacc16 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2260,16 +2260,15 @@ static bool intel_cdclk_can_squash(struct drm_i915_private *dev_priv, } /** - * intel_cdclk_needs_modeset - Determine if changong between the CDCLK - * configurations requires a modeset on all pipes + * intel_cdclk_clock_changed - Check whether the clock changed * @a: first CDCLK configuration * @b: second CDCLK configuration * * Returns: - * True if changing between the two CDCLK configurations - * requires all pipes to be off, false if not. + * True if CDCLK changed in a way that requires re-programming and + * False otherwise. */ -bool intel_cdclk_needs_modeset(const struct intel_cdclk_config *a, +bool intel_cdclk_clock_changed(const struct intel_cdclk_config *a, const struct intel_cdclk_config *b) { return a->cdclk != b->cdclk || @@ -2322,7 +2321,7 @@ static bool intel_cdclk_can_cd2x_update(struct drm_i915_private *dev_priv, static bool intel_cdclk_changed(const struct intel_cdclk_config *a, const struct intel_cdclk_config *b) { - return intel_cdclk_needs_modeset(a, b) || + return intel_cdclk_clock_changed(a, b) || a->voltage_level != b->voltage_level; } @@ -3229,7 +3228,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) drm_dbg_kms(&dev_priv->drm, "Can change cdclk cd2x divider with pipe %c active\n", pipe_name(pipe)); - } else if (intel_cdclk_needs_modeset(&old_cdclk_state->actual, + } else if (intel_cdclk_clock_changed(&old_cdclk_state->actual, &new_cdclk_state->actual)) { /* All pipes must be switched off while we change the cdclk. */ ret = intel_modeset_all_pipes_late(state, "CDCLK change"); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 48fd7d39e0cd..fa301495e7f1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -60,7 +60,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); u32 intel_read_rawclk(struct drm_i915_private *dev_priv); -bool intel_cdclk_needs_modeset(const struct intel_cdclk_config *a, +bool intel_cdclk_clock_changed(const struct intel_cdclk_config *a, const struct intel_cdclk_config *b); void intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state); void intel_set_cdclk_post_plane_update(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 47cd6bb04366..c4d48498e977 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -968,7 +968,7 @@ void gen9_disable_dc_states(struct drm_i915_private *dev_priv) intel_cdclk_get_cdclk(dev_priv, &cdclk_config); /* Can't read out voltage_level so can't use intel_cdclk_changed() */ drm_WARN_ON(&dev_priv->drm, - intel_cdclk_needs_modeset(&dev_priv->display.cdclk.hw, + intel_cdclk_clock_changed(&dev_priv->display.cdclk.hw, &cdclk_config)); gen9_assert_dbuf_enabled(dev_priv); From f6e4fe152dc65c6c8eb72b40cdfa33c62b8a53a6 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 21 Feb 2024 15:51:32 -0300 Subject: [PATCH 0030/1477] drm/i915/cdclk: Document CDCLK components MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve documentation by giving an overview of the components involved in the generation of the CDCLK. v2: Fix htmldoc error because of missing blank line at the start of bulleted list. Reviewed-by: Ville Syrjälä Signed-off-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240221185131.287302-2-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 77a25caacc16..22473c55b899 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -63,6 +63,32 @@ * DMC will not change the active CDCLK frequency however, so that part * will still be performed by the driver directly. * + * There are multiple components involved in the generation of the CDCLK + * frequency: + * + * - We have the CDCLK PLL, which generates an output clock based on a + * reference clock and a ratio parameter. + * - The CD2X Divider, which divides the output of the PLL based on a + * divisor selected from a set of pre-defined choices. + * - The CD2X Squasher, which further divides the output based on a + * waveform represented as a sequence of bits where each zero + * "squashes out" a clock cycle. + * - And, finally, a fixed divider that divides the output frequency by 2. + * + * As such, the resulting CDCLK frequency can be calculated with the + * following formula: + * + * cdclk = vco / cd2x_div / (sq_len / sq_div) / 2 + * + * , where vco is the frequency generated by the PLL; cd2x_div + * represents the CD2X Divider; sq_len and sq_div are the bit length + * and the number of high bits for the CD2X Squasher waveform, respectively; + * and 2 represents the fixed divider. + * + * Note that some older platforms do not contain the CD2X Divider + * and/or CD2X Squasher, in which case we can ignore their respective + * factors in the formula above. + * * Several methods exist to change the CDCLK frequency, which ones are * supported depends on the platform: * From 4c47049d93b7a7fc2230cded84a6aec6bbd3d61e Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 27 Feb 2024 08:49:22 -0800 Subject: [PATCH 0031/1477] drm/xe/guc: Fix missing topology init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit init_steering_dss need topology dss mask to be init ahead. Fixed by moving xe_gt_topology_init ahead of xe_gt_mcr_init Fixes: bf8ec3c3e82c ("drm/xe: Initialize GuC earlier during probe") Cc: Michał Winiarski Signed-off-by: Zhanjun Dong Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240227164922.281346-2-zhanjun.dong@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 45646d3aea2d..85408e7a932b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -315,8 +315,6 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_gt_topology_init(gt); - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); if (err) return err; @@ -503,6 +501,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) if (err) goto out; + xe_gt_topology_init(gt); xe_gt_mcr_init(gt); xe_pat_init(gt); From 2689b33b88641a3b9a8cc411a0c8094cbed7e871 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 8 Feb 2024 19:16:42 -0500 Subject: [PATCH 0032/1477] dt-bindings: display: panel-simple-dsi: add s6e3fa7 ams559nk06 compat The Samsung S6E3FA7 display controller and AMS559NK06 panel are used for the display in Pixel 3a devices. Add the compatible for it. Signed-off-by: Richard Acayan Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240209001639.387374-7-mailingradian@gmail.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240209001639.387374-7-mailingradian@gmail.com --- .../devicetree/bindings/display/panel/panel-simple-dsi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml index f9160d7bac3c..d3abd7f4ebcd 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml @@ -50,6 +50,8 @@ properties: - panasonic,vvx10f004b00 # Panasonic 10" WUXGA TFT LCD panel - panasonic,vvx10f034n00 + # Samsung s6e3fa7 1080x2220 based AMS559NK06 AMOLED panel + - samsung,s6e3fa7-ams559nk06 # Samsung s6e3fc2x01 1080x2340 AMOLED panel - samsung,s6e3fc2x01 # Samsung sofef00 1080x2280 AMOLED panel From bf0390e2c95bf630b22dddc7cde5f83762b658e5 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 8 Feb 2024 19:16:43 -0500 Subject: [PATCH 0033/1477] drm/panel: add samsung s6e3fa7 panel driver The S6E3FA7 display controller is enabled in every Pixel 3a (non-XL) variant. Add the driver for it, generated by linux-mdss-dsi-panel-driver-generator. There are other panels connected to the same S6E3FA7 display controller, such as the AMS604NL01 panel, which are incompatible with this driver. Name the device tree compatible after the panel model according to iFixit. Link: https://github.com/msm8916-mainline/linux-mdss-dsi-panel-driver-generator Link: https://android.googlesource.com/kernel/msm/+/7fda1cd7b64710dafac5f34899611c6d35eb4cd2/arch/arm64/boot/dts/google/dsi-panel-s6e3fa7-1080p-cmd.dtsi Link: https://github.com/msm8953-mainline/linux/blob/v6.6.12-r0/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c Link: https://www.ifixit.com/Guide/Image/meta/muyjtLQTHu6MDkhK Signed-off-by: Richard Acayan Reviewed-by: Jessica Zhang Link: https://lore.kernel.org/r/20240209001639.387374-8-mailingradian@gmail.com Signed-off-by: Neil Armstrong --- drivers/gpu/drm/panel/Kconfig | 9 + drivers/gpu/drm/panel/Makefile | 1 + drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c | 285 ++++++++++++++++++ 3 files changed, 295 insertions(+) create mode 100644 drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d037b3b8b999..6dc451f58a3e 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -586,6 +586,15 @@ config DRM_PANEL_SAMSUNG_LD9040 depends on BACKLIGHT_CLASS_DEVICE select VIDEOMODE_HELPERS +config DRM_PANEL_SAMSUNG_S6E3FA7 + tristate "Samsung S6E3FA7 panel driver" + depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y here if you want to enable support for the Samsung S6E3FA7 + 1920x2220 panel. + config DRM_PANEL_SAMSUNG_S6D16D0 tristate "Samsung S6D16D0 DSI video mode panel" depends on OF diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index f156d7fa0bcc..24a02655d726 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D27A1) += panel-samsung-s6d27a1.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0) += panel-samsung-s6d7aa0.o +obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3FA7) += panel-samsung-s6e3fa7.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03) += panel-samsung-s6e63j0x03.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0) += panel-samsung-s6e63m0.o diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c b/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c new file mode 100644 index 000000000000..10bc8fb5f1f9 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-samsung-s6e3fa7.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the Samsung S6E3FA7 panel. + * + * Copyright (c) 2022-2024, The Linux Foundation. All rights reserved. + * Generated with linux-mdss-dsi-panel-driver-generator from vendor device tree: + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include