A suspend/resume error fix for ivpu, a couple of scheduler fixes for

nouveau, a patch to support large page arrays in prime, a uninitialized variable fix in crtc, a locking fix in rockchip/vop2 and a buddy allocator error reporting fix. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRcEzekXsqa64kGDp7j7w1vZxhRxQUCZc3QIQAKCRDj7w1vZxhR xcKEAPwL2+B0A5ospkDPQanuxjAqxD9oj7f6rPL21douxs1RiAEA8QjLka/2VeDo R4hHMXh8ZrhHPiv3R2WmPSD1+Uw7rQg= =2keo -----END PGP SIGNATURE----- Merge tag 'drm-misc-fixes-2024-02-15' of git://anongit.freedesktop.org/drm/drm-misc into drm-fixes A suspend/resume error fix for ivpu, a couple of scheduler fixes for nouveau, a patch to support large page arrays in prime, a uninitialized variable fix in crtc, a locking fix in rockchip/vop2 and a buddy allocator error reporting fix. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Maxime Ripard <mripard@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/b4ffqzigtfh6cgzdpwuk6jlrv3dnk4hu6etiizgvibysqgtl2p@42n2gdfdd5eu
2024-02-16 14:33:09 +10:00 · 2024-02-16 14:33:09 +10:00 · 38df7e5e6c
parent 841c351693 a64056bb5a
commit 38df7e5e6c
16 changed files with 216 additions and 50 deletions
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@ -510,16 +510,6 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
 	return ret;
 }

-static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
-{
-	ivpu_boot_dpu_active_drive(vdev, false);
-	ivpu_boot_pwr_island_isolation_drive(vdev, true);
-	ivpu_boot_pwr_island_trickle_drive(vdev, false);
-	ivpu_boot_pwr_island_drive(vdev, false);
-
-	return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
-}
-
 static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
 {
 	u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
@ -616,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
 	return 0;
 }

+static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev)
+{
+	int ret;
+	u32 val;
+
+	if (IVPU_WA(punit_disabled))
+		return 0;
+
+	ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+	if (ret) {
+		ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
+		return ret;
+	}
+
+	val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
+	val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
+	REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
+
+	ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+	if (ret)
+		ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+	return ret;
+}
+
 static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
 {
 	int ret = 0;

-	if (ivpu_boot_pwr_domain_disable(vdev)) {
-		ivpu_err(vdev, "Failed to disable power domain\n");
+	if (ivpu_hw_37xx_ip_reset(vdev)) {
+		ivpu_err(vdev, "Failed to reset NPU\n");
 		ret = -EIO;
 	}

@ -661,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
 {
 	int ret;

+	/* PLL requests may fail when powering down, so issue WP 0 here */
+	ret = ivpu_pll_disable(vdev);
+	if (ret)
+		ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret);
+
 	ret = ivpu_hw_37xx_d0i3_disable(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@ -58,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev)
 {
 	int ret;

+	/* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+	pci_save_state(to_pci_dev(vdev->drm.dev));
+
 	ret = ivpu_shutdown(vdev);
-	if (ret) {
+	if (ret)
 		ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-		return ret;
-	}
+
+	pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);

 	return ret;
 }
@ -71,6 +74,9 @@ static int ivpu_resume(struct ivpu_device *vdev)
 {
 	int ret;

+	pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+	pci_restore_state(to_pci_dev(vdev->drm.dev));
+
 retry:
 	ret = ivpu_hw_power_up(vdev);
 	if (ret) {
@ -120,15 +126,20 @@ static void ivpu_pm_recovery_work(struct work_struct *work)

 	ivpu_fw_log_dump(vdev);

-retry:
-	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
-	if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
-		cond_resched();
-		goto retry;
-	}
+	atomic_inc(&vdev->pm->reset_counter);
+	atomic_set(&vdev->pm->reset_pending, 1);
+	down_write(&vdev->pm->reset_lock);

-	if (ret && ret != -EAGAIN)
-		ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
+	ivpu_suspend(vdev);
+	ivpu_pm_prepare_cold_boot(vdev);
+	ivpu_jobs_abort_all(vdev);
+
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
+
+	up_write(&vdev->pm->reset_lock);
+	atomic_set(&vdev->pm->reset_pending, 0);

 	kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
 	pm_runtime_mark_last_busy(vdev->drm.dev);
@ -200,9 +211,6 @@ int ivpu_pm_suspend_cb(struct device *dev)
 	ivpu_suspend(vdev);
 	ivpu_pm_prepare_warm_boot(vdev);

-	pci_save_state(to_pci_dev(dev));
-	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
-
 	ivpu_dbg(vdev, PM, "Suspend done.\n");

 	return 0;
@ -216,9 +224,6 @@ int ivpu_pm_resume_cb(struct device *dev)

 	ivpu_dbg(vdev, PM, "Resume..\n");

-	pci_set_power_state(to_pci_dev(dev), PCI_D0);
-	pci_restore_state(to_pci_dev(dev));
-
 	ret = ivpu_resume(vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to resume: %d\n", ret);
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
 	} while (1);

 	list_splice_tail(&allocated, blocks);
+
+	if (total_allocated < size) {
+		err = -ENOSPC;
+		goto err_free;
+	}
+
 	return 0;

 err_undo:
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@ -904,6 +904,7 @@ out:
 	connector_set = NULL;
 	fb = NULL;
 	mode = NULL;
+	num_connectors = 0;

 	DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);

--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
 	if (max_segment == 0)
 		max_segment = UINT_MAX;
 	err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
-						nr_pages << PAGE_SHIFT,
+						(unsigned long)nr_pages << PAGE_SHIFT,
 						max_segment, GFP_KERNEL);
 	if (err) {
 		kfree(sg);
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 	struct nouveau_abi16_ntfy *ntfy, *temp;

 	/* Cancel all jobs from the entity's queue. */
-	drm_sched_entity_fini(&chan->sched.entity);
+	if (chan->sched)
+		drm_sched_entity_fini(&chan->sched->entity);

 	if (chan->chan)
 		nouveau_channel_idle(chan->chan);

-	nouveau_sched_fini(&chan->sched);
+	if (chan->sched)
+		nouveau_sched_destroy(&chan->sched);

 	/* cleanup notifier state */
 	list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
@ -337,10 +339,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (ret)
 		goto done;

-	ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
-				 chan->chan->dma.ib_max);
-	if (ret)
-		goto done;
+	/* If we're not using the VM_BIND uAPI, we don't need a scheduler.
+	 *
+	 * The client lock is already acquired by nouveau_abi16_get().
+	 */
+	if (nouveau_cli_uvmm(cli)) {
+		ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
+					   chan->chan->dma.ib_max);
+		if (ret)
+			goto done;
+	}

 	init->channel = chan->chan->chid;

--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
 	struct nouveau_bo *ntfy;
 	struct nouveau_vma *ntfy_vma;
 	struct nvkm_mm  heap;
-	struct nouveau_sched sched;
+	struct nouveau_sched *sched;
 };

 struct nouveau_abi16 {
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
 	WARN_ON(!list_empty(&cli->worker));

 	usif_client_fini(cli);
-	nouveau_sched_fini(&cli->sched);
+	if (cli->sched)
+		nouveau_sched_destroy(&cli->sched);
 	if (uvmm)
 		nouveau_uvmm_fini(uvmm);
 	nouveau_vmm_fini(&cli->svm);
@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 	cli->mem = &mems[ret];

 	/* Don't pass in the (shared) sched_wq in order to let
-	 * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+	 * nouveau_sched_create() create a dedicated one for VM_BIND jobs.
 	 *
 	 * This is required to ensure that for VM_BIND jobs free_job() work and
 	 * run_job() work can always run concurrently and hence, free_job() work
@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 	 * locks which indirectly or directly are held for allocations
 	 * elsewhere.
 	 */
-	ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
+	ret = nouveau_sched_create(&cli->sched, drm, NULL, 1);
 	if (ret)
 		goto done;

--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@ -98,7 +98,7 @@ struct nouveau_cli {
 		bool disabled;
 	} uvmm;

-	struct nouveau_sched sched;
+	struct nouveau_sched *sched;

 	const struct nvif_mclass *mem;

--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
 	if (ret)
 		goto out;

-	args.sched = &chan16->sched;
+	args.sched = chan16->sched;
 	args.file_priv = file_priv;
 	args.chan = chan;

--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = {
 	.free_job = nouveau_sched_free_job,
 };

-int
+static int
 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
 		   struct workqueue_struct *wq, u32 credit_limit)
 {
@ -453,7 +453,30 @@ fail_wq:
 	return ret;
 }

-void
+int
+nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+		     struct workqueue_struct *wq, u32 credit_limit)
+{
+	struct nouveau_sched *sched;
+	int ret;
+
+	sched = kzalloc(sizeof(*sched), GFP_KERNEL);
+	if (!sched)
+		return -ENOMEM;
+
+	ret = nouveau_sched_init(sched, drm, wq, credit_limit);
+	if (ret) {
+		kfree(sched);
+		return ret;
+	}
+
+	*psched = sched;
+
+	return 0;
+}
+
+
+static void
 nouveau_sched_fini(struct nouveau_sched *sched)
 {
 	struct drm_gpu_scheduler *drm_sched = &sched->base;
@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched)
 	if (sched->wq)
 		destroy_workqueue(sched->wq);
 }
+
+void
+nouveau_sched_destroy(struct nouveau_sched **psched)
+{
+	struct nouveau_sched *sched = *psched;
+
+	nouveau_sched_fini(sched);
+	kfree(sched);
+
+	*psched = NULL;
+}
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@ -111,8 +111,8 @@ struct nouveau_sched {
 	} job;
 };

-int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
-		       struct workqueue_struct *wq, u32 credit_limit);
-void nouveau_sched_fini(struct nouveau_sched *sched);
+int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+			 struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_destroy(struct nouveau_sched **psched);

 #endif
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
 	if (ret)
 		return ret;

-	buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
+	buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL);
 	if (!buffer->fault)
 		return -ENOMEM;

--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
 	if (ret)
 		return ret;

-	args.sched = &cli->sched;
+	args.sched = cli->sched;
 	args.file_priv = file_priv;

 	ret = nouveau_uvmm_vm_bind(&args);
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@ -1985,8 +1985,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 		clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
 	}

-	if (!clock)
+	if (!clock) {
+		vop2_unlock(vop2);
 		return;
+	}

 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
 	    !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@ -8,6 +8,7 @@

 #include <linux/prime_numbers.h>
 #include <linux/sched/signal.h>
+#include <linux/sizes.h>

 #include <drm/drm_buddy.h>

@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)
 	return (1 << order) * chunk_size;
 }

+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+	u64 mm_size, ps = SZ_4K, i, n_pages, total;
+	struct drm_buddy_block *block;
+	struct drm_buddy mm;
+	LIST_HEAD(left);
+	LIST_HEAD(middle);
+	LIST_HEAD(right);
+	LIST_HEAD(allocated);
+
+	mm_size = 16 * 3 * SZ_4K;
+
+	KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+	/*
+	 * Idea is to fragment the address space by alternating block
+	 * allocations between three different lists; one for left, middle and
+	 * right. We can then free a list to simulate fragmentation. In
+	 * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+	 * including the try_harder path.
+	 */
+
+	i = 0;
+	n_pages = mm_size / ps;
+	do {
+		struct list_head *list;
+		int slot = i % 3;
+
+		if (slot == 0)
+			list = &left;
+		else if (slot == 1)
+			list = &middle;
+		else
+			list = &right;
+		KUNIT_ASSERT_FALSE_MSG(test,
+				       drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							      ps, ps, list, 0),
+				       "buddy_alloc hit an error size=%d\n",
+				       ps);
+	} while (++i < n_pages);
+
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+	drm_buddy_free_list(&mm, &middle);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%llu\n", 3 * ps);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   2 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+	drm_buddy_free_list(&mm, &right);
+	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							   3 * ps, ps, &allocated,
+							   DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc didn't error size=%llu\n", 3 * ps);
+	/*
+	 * At this point we should have enough contiguous space for 2 blocks,
+	 * however they are never buddies (since we freed middle and right) so
+	 * will require the try_harder logic to find them.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    2 * ps, ps, &allocated,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+	drm_buddy_free_list(&mm, &left);
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+							    3 * ps, ps, &allocated,
+							    DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+			       "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+	total = 0;
+	list_for_each_entry(block, &allocated, link)
+		total += drm_buddy_block_size(&mm, block);
+
+	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_fini(&mm);
+}
+
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
@ -280,6 +368,7 @@ static struct kunit_case drm_buddy_tests[] = {
 	KUNIT_CASE(drm_test_buddy_alloc_optimistic),
 	KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
 	KUNIT_CASE(drm_test_buddy_alloc_pathological),
+	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
 	{}
 };