From 74a49415144035f171751d55b11ba04c9f348f9f Mon Sep 17 00:00:00 2001 From: lyndonli Date: Sun, 23 Apr 2023 16:46:30 +0800 Subject: [PATCH 01/11] drm/amdgpu: Fix mode2 reset for sienna cichlid Before this change, sienna_cichlid_get_reset_handler will always return NULL, although the module parameter reset_method is 3 when loading amdgpu driver. Signed-off-by: lyndonli Signed-off-by: Yunxiang Li Reviewed-by: Feifei Xu Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 81a6d5b94987..8b8086d5c864 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -40,7 +40,7 @@ static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_c adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev)) return true; #endif - return false; + return amdgpu_reset_method == AMD_RESET_METHOD_MODE2; } static struct amdgpu_reset_handler * From 4eea7fb980dc44545a32eec92e2662053b34cd9d Mon Sep 17 00:00:00 2001 From: lyndonli Date: Sun, 23 Apr 2023 17:05:15 +0800 Subject: [PATCH 02/11] drm/amdgpu: Use the default reset when loading or reloading the driver Below call trace and errors are observed when reloading amdgpu driver with the module parameter reset_method=3. It should do a default reset when loading or reloading the driver, regardless of the module parameter reset_method. v2: add comments inside and modify commit messages. [ +2.180243] [drm] psp gfx command ID_LOAD_TOC(0x20) failed and response status is (0x0) [ +0.000011] [drm:psp_hw_start [amdgpu]] *ERROR* Failed to load toc [ +0.000890] [drm:psp_hw_start [amdgpu]] *ERROR* PSP tmr init failed! [ +0.020683] [drm:amdgpu_fill_buffer [amdgpu]] *ERROR* Trying to clear memory with ring turned off. [ +0.000003] RIP: 0010:amdgpu_bo_release_notify+0x1ef/0x210 [amdgpu] [ +0.000004] Call Trace: [ +0.000003] [ +0.000008] ttm_bo_release+0x2c4/0x330 [amdttm] [ +0.000026] amdttm_bo_put+0x3c/0x70 [amdttm] [ +0.000020] amdgpu_bo_free_kernel+0xe6/0x140 [amdgpu] [ +0.000728] psp_v11_0_ring_destroy+0x34/0x60 [amdgpu] [ +0.000826] psp_hw_init+0xe7/0x2f0 [amdgpu] [ +0.000813] amdgpu_device_fw_loading+0x1ad/0x2d0 [amdgpu] [ +0.000731] amdgpu_device_init.cold+0x108e/0x2002 [amdgpu] [ +0.001071] ? do_pci_enable_device+0xe1/0x110 [ +0.000011] amdgpu_driver_load_kms+0x1a/0x160 [amdgpu] [ +0.000729] amdgpu_pci_probe+0x179/0x3a0 [amdgpu] Signed-off-by: lyndonli Signed-off-by: Yunxiang Li Reviewed-by: Feifei Xu Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9b1eaba85bbd..981a9cfb63b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3578,6 +3578,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, int r, i; bool px = false; u32 max_MBps; + int tmp; adev->shutdown = false; adev->flags = flags; @@ -3799,7 +3800,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; From b03f38b9bd90d9eb29951e56f5a4375984c8dffb Mon Sep 17 00:00:00 2001 From: Shane Xiao Date: Tue, 25 Apr 2023 22:39:08 +0800 Subject: [PATCH 03/11] drm/amdgpu: Enable doorbell selfring after resize FB BAR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] The selfring doorbell aperture will change when resize FB BAR successfully during gmc sw init, we should reorder the sequence of enabling doorbell selfring aperture. [How] Move enable_doorbell_selfring_aperture from *_common_hw_init to *_common_late_init. This fixes the potential issue that GPU ring its own doorbell when this device is in translated mode when iommu is on. v2: Remove *_enable_doorbell_aperture functions (Christian) v3: Add comments to note that why we need enable doorbell selfring late (Christian) Signed-off-by: Shane Xiao Signed-off-by: Aaron Liu Tested-by: Xiaomeng Hou Reviewed-by: Christian K�nig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 23 +++++++++++++---------- drivers/gpu/drm/amd/amdgpu/soc15.c | 25 +++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/soc21.c | 23 +++++++++++++---------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 47420b403871..98c826f1f89b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -531,13 +531,6 @@ static void nv_program_aspm(struct amdgpu_device *adev) } -static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version nv_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, @@ -999,6 +992,11 @@ static int nv_common_late_init(void *handle) } } + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -1038,7 +1036,7 @@ static int nv_common_hw_init(void *handle) if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ - nv_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } @@ -1047,8 +1045,13 @@ static int nv_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - nv_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because nv_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index bc5dd80f10c1..6d15d5cd9e07 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -619,13 +619,6 @@ static void soc15_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version vega10_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, @@ -1125,6 +1118,11 @@ static int soc15_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -1182,7 +1180,8 @@ static int soc15_common_hw_init(void *handle) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ - soc15_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); + /* HW doorbell routing policy: doorbell writing not * in SDMA/IH/MM/ACV range will be routed to CP. So * we need to init SDMA doorbell range prior @@ -1198,8 +1197,14 @@ static int soc15_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - soc15_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc15_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); + if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 514bfc705d5a..744be2a05623 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -450,13 +450,6 @@ static void soc21_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version soc21_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, @@ -764,6 +757,11 @@ static int soc21_common_late_init(void *handle) amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); } + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } @@ -797,7 +795,7 @@ static int soc21_common_hw_init(void *handle) if (adev->nbio.funcs->remap_hdp_registers) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ - soc21_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } @@ -806,8 +804,13 @@ static int soc21_common_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* disable the doorbell aperture */ - soc21_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc21_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_put_irq(adev); From 13af556104fa93b1945c70bbf8a0a62cd2c92879 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Tue, 25 Apr 2023 13:16:32 +0800 Subject: [PATCH 04/11] drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v11_0_hw_fini The gmc.ecc_irq is enabled by firmware per IFWI setting, and the host driver is not privileged to enable/disable the interrupt. So, it is meaningless to use the amdgpu_irq_put function in gmc_v11_0_hw_fini, which also leads to the call trace. [ 102.980303] Call Trace: [ 102.980303] [ 102.980304] gmc_v11_0_hw_fini+0x54/0x90 [amdgpu] [ 102.980357] gmc_v11_0_suspend+0xe/0x20 [amdgpu] [ 102.980409] amdgpu_device_ip_suspend_phase2+0x240/0x460 [amdgpu] [ 102.980459] amdgpu_device_ip_suspend+0x3d/0x80 [amdgpu] [ 102.980520] amdgpu_device_pre_asic_reset+0xd9/0x490 [amdgpu] [ 102.980573] amdgpu_device_gpu_recover.cold+0x548/0xce6 [amdgpu] [ 102.980687] amdgpu_debugfs_reset_work+0x4c/0x70 [amdgpu] [ 102.980740] process_one_work+0x21f/0x3f0 [ 102.980741] worker_thread+0x200/0x3e0 [ 102.980742] ? process_one_work+0x3f0/0x3f0 [ 102.980743] kthread+0xfd/0x130 [ 102.980743] ? kthread_complete_and_exit+0x20/0x20 [ 102.980744] ret_from_fork+0x22/0x30 Signed-off-by: Horatio Zhang Reviewed-by: Hawking Zhang Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Fixes: c8b5a95b5709 ("drm/amdgpu: Fix desktop freezed after gpu-reset") Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index d809f2ed5600..d95f9fe8f1c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -951,7 +951,6 @@ static int gmc_v11_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v11_0_gart_disable(adev); From 08c677cb0b436a96a836792bb35a8ec5de4999c2 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Tue, 25 Apr 2023 10:52:28 +0800 Subject: [PATCH 05/11] drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v10_0_hw_fini The gmc.ecc_irq is enabled by firmware per IFWI setting, and the host driver is not privileged to enable/disable the interrupt. So, it is meaningless to use the amdgpu_irq_put function in gmc_v10_0_hw_fini, which also leads to the call trace. [ 82.340264] Call Trace: [ 82.340265] [ 82.340269] gmc_v10_0_hw_fini+0x83/0xa0 [amdgpu] [ 82.340447] gmc_v10_0_suspend+0xe/0x20 [amdgpu] [ 82.340623] amdgpu_device_ip_suspend_phase2+0x127/0x1c0 [amdgpu] [ 82.340789] amdgpu_device_ip_suspend+0x3d/0x80 [amdgpu] [ 82.340955] amdgpu_device_pre_asic_reset+0xdd/0x2b0 [amdgpu] [ 82.341122] amdgpu_device_gpu_recover.cold+0x4dd/0xbb2 [amdgpu] [ 82.341359] amdgpu_debugfs_reset_work+0x4c/0x70 [amdgpu] [ 82.341529] process_one_work+0x21d/0x3f0 [ 82.341535] worker_thread+0x1fa/0x3c0 [ 82.341538] ? process_one_work+0x3f0/0x3f0 [ 82.341540] kthread+0xff/0x130 [ 82.341544] ? kthread_complete_and_exit+0x20/0x20 [ 82.341547] ret_from_fork+0x22/0x30 Signed-off-by: Horatio Zhang Reviewed-by: Hawking Zhang Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Fixes: c8b5a95b5709 ("drm/amdgpu: Fix desktop freezed after gpu-reset") Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 7d6f4a68f416..b213dcf8ca06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1143,7 +1143,6 @@ static int gmc_v10_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); return 0; From 922a76ba31adf84e72bc947267385be420c689ee Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 2 May 2023 11:59:08 -0400 Subject: [PATCH 06/11] drm/amdgpu: fix an amdgpu_irq_put() issue in gmc_v9_0_hw_fini() As made mention of in commit 08c677cb0b43 ("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v10_0_hw_fini") and commit 13af556104fa ("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v11_0_hw_fini"). It is meaningless to call amdgpu_irq_put() for gmc.ecc_irq. So, remove it from gmc_v9_0_hw_fini(). Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Fixes: 3029c855d79f ("drm/amdgpu: Fix desktop freezed after gpu-reset") Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 64ab1a306dfe..2fe21cefd772 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1999,7 +1999,6 @@ static int gmc_v9_0_hw_fini(void *handle) if (adev->mmhub.funcs->update_power_gating) adev->mmhub.funcs->update_power_gating(adev, false); - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); return 0; From 2397e3d8d2e120355201a8310b61929f5a8bd2c0 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 26 Apr 2023 15:54:55 -0700 Subject: [PATCH 07/11] drm/amdgpu: add a missing lock for AMDGPU_SCHED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mgr->ctx_handles should be protected by mgr->lock. v2: improve commit message v3: add a Fixes tag Signed-off-by: Chia-I Wu Reviewed-by: Christian König Fixes: 52c6a62c64fa ("drm/amdgpu: add interface for editing a foreign process's priority v3") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index e9b45089a28a..863b2a34b2d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -38,6 +38,7 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, { struct fd f = fdget(fd); struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx_mgr *mgr; struct amdgpu_ctx *ctx; uint32_t id; int r; @@ -51,8 +52,11 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, return r; } - idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + idr_for_each_entry(&mgr->ctx_handles, ctx, id) amdgpu_ctx_priority_override(ctx, priority); + mutex_unlock(&mgr->lock); fdput(f); return 0; From 100bd00881f8553d0ccfc99a575966d990c455eb Mon Sep 17 00:00:00 2001 From: Horace Chen Date: Tue, 25 Apr 2023 13:15:32 +0800 Subject: [PATCH 08/11] drm/amdgpu: disable SDMA WPTR_POLL_ENABLE for SR-IOV [Why] This WPTR_POLL_ENABLE is a hardware contigious polling which will cause FCLK and UCLK to keep on a high level. Mostly its case can be covered by F32_WPTR_POLL_ENABLE which polls by firmware. So to save power, SR-IOV also needs to disable this bit Signed-off-by: Horace Chen Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index eb722830531f..3d9a80511a45 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -510,10 +510,7 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); - else - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); From 8f586cc16c1fc3c2202c9d54563db8c7ed365f82 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 13 Apr 2023 17:34:24 -0400 Subject: [PATCH 09/11] drm/amd/display: Change default Z8 watermark values [Why & How] Previous Z8 watermark values were causing flickering and OTC underflow. Updating Z8 watermark values based on the measurement. Reviewed-by: Nicholas Kazlauskas Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Alan Liu Signed-off-by: Leo Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 44082f65de1f..9e54e3d0eb78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -149,8 +149,8 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .num_states = 5, .sr_exit_time_us = 16.5, .sr_enter_plus_exit_time_us = 18.5, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 310.0, + .sr_exit_z8_time_us = 268.0, + .sr_enter_plus_exit_z8_time_us = 393.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, From 682439fffad9fa9a38d37dd1b1318e9374232213 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Wed, 19 Apr 2023 18:17:14 -0400 Subject: [PATCH 10/11] drm/amd/display: filter out invalid bits in pipe_fuses [Why] Reading pipe_fuses from register may have invalid bits set, which may affect the num_pipes erroneously. [How] Add read_pipes_fuses() call and filter bits based on expected number of pipes. Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Samson Tam Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 10 +++++++++- .../gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index a876e6eb6cd8..22dd1ebea618 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2079,6 +2079,14 @@ static struct resource_funcs dcn32_res_pool_funcs = { .restore_mall_state = dcn32_restore_mall_state, }; +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN32 support max 4 pipes */ + value = value & 0xf; + return value; +} + static bool dcn32_resource_construct( uint8_t num_virtual_links, @@ -2122,7 +2130,7 @@ static bool dcn32_resource_construct( pool->base.res_cap = &res_cap_dcn32; /* max number of pipes for ASIC before checking for pipe fuses */ num_pipes = pool->base.res_cap->num_timing_generator; - pipe_fuses = REG_READ(CC_DC_PIPE_DIS); + pipe_fuses = read_pipe_fuses(ctx); for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) if (pipe_fuses & 1 << i) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index e5ab7f3077c4..a60ddb343d13 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1632,6 +1632,14 @@ static struct resource_funcs dcn321_res_pool_funcs = { .restore_mall_state = dcn32_restore_mall_state, }; +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN321 support max 4 pipes */ + value = value & 0xf; + return value; +} + static bool dcn321_resource_construct( uint8_t num_virtual_links, @@ -1674,7 +1682,7 @@ static bool dcn321_resource_construct( pool->base.res_cap = &res_cap_dcn321; /* max number of pipes for ASIC before checking for pipe fuses */ num_pipes = pool->base.res_cap->num_timing_generator; - pipe_fuses = REG_READ(CC_DC_PIPE_DIS); + pipe_fuses = read_pipe_fuses(ctx); for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) if (pipe_fuses & 1 << i) From 1253685f0d3eb3eab0bfc4bf15ab341a5f3da0c8 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 26 Apr 2023 09:46:54 +0800 Subject: [PATCH 11/11] drm/amdgpu: drop redundant sched job cleanup when cs is aborted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once command submission failed due to userptr invalidation in amdgpu_cs_submit, legacy code will perform cleanup of scheduler job. However, it's not needed at all, as former commit has integrated job cleanup stuff into amdgpu_job_free. Otherwise, because of double free, a NULL pointer dereference will occur in such scenario. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2457 Fixes: f7d66fb2ea43 ("drm/amdgpu: cleanup scheduler job initialization v2") Signed-off-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 08eced097bd8..2eb2c66843a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1276,7 +1276,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, r = drm_sched_job_add_dependency(&leader->base, fence); if (r) { dma_fence_put(fence); - goto error_cleanup; + return r; } } @@ -1303,7 +1303,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } if (r) { r = -EAGAIN; - goto error_unlock; + mutex_unlock(&p->adev->notifier_lock); + return r; } p->fence = dma_fence_get(&leader->base.s_fence->finished); @@ -1350,14 +1351,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, mutex_unlock(&p->adev->notifier_lock); mutex_unlock(&p->bo_list->bo_list_mutex); return 0; - -error_unlock: - mutex_unlock(&p->adev->notifier_lock); - -error_cleanup: - for (i = 0; i < p->gang_size; ++i) - drm_sched_job_cleanup(&p->jobs[i]->base); - return r; } /* Cleanup the parser structure */