Merge tag 'drm-intel-gt-next-2021-12-23' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

Driver Changes: - Added bits of DG2 support around page table handling (Stuart Summers, Matthew Auld) - Fixed wakeref leak in PMU busyness during reset in GuC mode (Umesh Nerlige Ramappa) - Fixed debugfs access crash if GuC failed to load (John Harrison) - Bring back GuC error log to error capture, undoing accidental earlier breakage (Thomas Hellström) - Fixed memory leak in error capture caused by earlier refactoring (Thomas Hellström) - Exclude reserved stolen from driver use (Chris Wilson) - Add memory region sanity checking and optional full test (Chris Wilson) - Fixed buffer size truncation in TTM shmemfs backend (Robert Beckett) - Use correct lock and don't overwrite internal data structures when stealing GuC context ids (Matthew Brost) - Don't hog IRQs when destroying GuC contexts (John Harrison) - Make GuC to Host communication more robust (Matthew Brost) - Continuation of locking refactoring around VMA and backing store handling (Maarten Lankhorst) - Improve performance of reading GuC log from debugfs (John Harrison) - Log when GuC fails to reset an engine (John Harrison) - Speed up GuC/HuC firmware loading by requesting RP0 (Vinay Belgaumkar) - Further work on asynchronous VMA unbinding (Thomas Hellström, Christian König) - Refactor GuC/HuC firmware handling to prepare for future platforms (John Harrison) - Prepare for future different GuC/HuC firmware signing key sizes (Daniele Ceraolo Spurio, Michal Wajdeczko) - Add noreclaim annotations (Matthew Auld) - Remove racey GEM_BUG_ON between GPU reset and GuC communication handling (Matthew Brost) - Refactor i915->gt with to_gt(i915) to prepare for future platforms (Michał Winiarski, Andi Shyti) - Increase GuC log size for CONFIG_DEBUG_GEM (John Harrison) - Fixed engine busyness in selftests when in GuC mode (Umesh Nerlige Ramappa) - Make engine parking work with PREEMPT_RT (Sebastian Andrzej Siewior) - Replace X86_FEATURE_PAT with pat_enabled() (Lucas De Marchi) - Selftest for stealing of guc ids (Matthew Brost) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YcRvKO5cyPvIxVCi@tursulin-mobl2
2024-08-23 09:19:51 +00:00 · 2021-12-24 06:08:29 +10:00 · 2021-12-24 06:08:29 +10:00 · 4817c37d71
commit 4817c37d71
parent 78942ae41d 6cb12fbda1
126 changed files with 2058 additions and 1426 deletions
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@ -163,6 +163,7 @@ i915-y += \
 	  i915_active.o \
 	  i915_buddy.o \
 	  i915_cmd_parser.o \
+	  i915_deps.o \
 	  i915_gem_evict.o \
 	  i915_gem_gtt.o \
 	  i915_gem_ww.o \
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@ -819,7 +819,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 	 * maximum clocks following a vblank miss (see do_rps_boost()).
 	 */
 	if (!state->rps_interactive) {
-		intel_rps_mark_interactive(&dev_priv->gt.rps, true);
+		intel_rps_mark_interactive(&to_gt(dev_priv)->rps, true);
 		state->rps_interactive = true;
 	}

@ -853,7 +853,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 		return;

 	if (state->rps_interactive) {
-		intel_rps_mark_interactive(&dev_priv->gt.rps, false);
+		intel_rps_mark_interactive(&to_gt(dev_priv)->rps, false);
 		state->rps_interactive = false;
 	}

--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@ -843,7 +843,7 @@ __intel_display_resume(struct drm_device *dev,
 static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
 {
 	return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display &&
-		intel_has_gpu_reset(&dev_priv->gt));
+		intel_has_gpu_reset(to_gt(dev_priv)));
 }

 void intel_display_prepare_reset(struct drm_i915_private *dev_priv)
@ -862,14 +862,14 @@ void intel_display_prepare_reset(struct drm_i915_private *dev_priv)
 		return;

 	/* We have a modeset vs reset deadlock, defensively unbreak it. */
-	set_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags);
+	set_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags);
 	smp_mb__after_atomic();
-	wake_up_bit(&dev_priv->gt.reset.flags, I915_RESET_MODESET);
+	wake_up_bit(&to_gt(dev_priv)->reset.flags, I915_RESET_MODESET);

 	if (atomic_read(&dev_priv->gpu_error.pending_fb_pin)) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "Modeset potentially stuck, unbreaking through wedging\n");
-		intel_gt_set_wedged(&dev_priv->gt);
+		intel_gt_set_wedged(to_gt(dev_priv));
 	}

 	/*
@ -920,7 +920,7 @@ void intel_display_finish_reset(struct drm_i915_private *dev_priv)
 		return;

 	/* reset doesn't touch the display */
-	if (!test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags))
+	if (!test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags))
 		return;

 	state = fetch_and_zero(&dev_priv->modeset_restore_state);
@ -958,7 +958,7 @@ void intel_display_finish_reset(struct drm_i915_private *dev_priv)
 	drm_modeset_acquire_fini(ctx);
 	mutex_unlock(&dev->mode_config.mutex);

-	clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags);
+	clear_bit_unlock(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags);
 }

 static void icl_set_pipe_chicken(const struct intel_crtc_state *crtc_state)
@ -8513,19 +8513,19 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat
 	for (;;) {
 		prepare_to_wait(&intel_state->commit_ready.wait,
 				&wait_fence, TASK_UNINTERRUPTIBLE);
-		prepare_to_wait(bit_waitqueue(&dev_priv->gt.reset.flags,
+		prepare_to_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags,
 					      I915_RESET_MODESET),
 				&wait_reset, TASK_UNINTERRUPTIBLE);


 		if (i915_sw_fence_done(&intel_state->commit_ready) ||
-		    test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags))
+		    test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags))
 			break;

 		schedule();
 	}
 	finish_wait(&intel_state->commit_ready.wait, &wait_fence);
-	finish_wait(bit_waitqueue(&dev_priv->gt.reset.flags,
+	finish_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags,
 				  I915_RESET_MODESET),
 		    &wait_reset);
 }
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@ -264,7 +264,7 @@ intel_dpt_create(struct intel_framebuffer *fb)

 	vm = &dpt->vm;

-	vm->gt = &i915->gt;
+	vm->gt = to_gt(i915);
 	vm->i915 = i915;
 	vm->dma = i915->drm.dev;
 	vm->total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
@ -279,8 +279,6 @@ intel_dpt_create(struct intel_framebuffer *fb)

 	vm->vma_ops.bind_vma    = dpt_bind_vma;
 	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
-	vm->vma_ops.set_pages   = ggtt_set_pages;
-	vm->vma_ops.clear_pages = clear_pages;

 	vm->pte_encode = gen8_ggtt_pte_encode;

--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@ -1382,7 +1382,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	if (!HAS_OVERLAY(dev_priv))
 		return;

-	engine = dev_priv->gt.engine[RCS0];
+	engine = to_gt(dev_priv)->engine[RCS0];
 	if (!engine || !engine->kernel_context)
 		return;

--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@ -1737,7 +1737,7 @@ static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);

-	return intel_pxp_key_check(&i915->gt.pxp, obj, false) == 0;
+	return intel_pxp_key_check(&to_gt(i915)->pxp, obj, false) == 0;
 }

 static bool pxp_is_borked(struct drm_i915_gem_object *obj)
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@ -237,7 +237,7 @@ static int proto_context_set_persistence(struct drm_i915_private *i915,
 		 * colateral damage, and we should not pretend we can by
 		 * exposing the interface.
 		 */
-		if (!intel_has_reset_engine(&i915->gt))
+		if (!intel_has_reset_engine(to_gt(i915)))
 			return -ENODEV;

 		pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE);
@ -254,7 +254,7 @@ static int proto_context_set_protected(struct drm_i915_private *i915,

 	if (!protected) {
 		pc->uses_protected_content = false;
-	} else if (!intel_pxp_is_enabled(&i915->gt.pxp)) {
+	} else if (!intel_pxp_is_enabled(&to_gt(i915)->pxp)) {
 		ret = -ENODEV;
 	} else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) ||
 		   !(pc->user_flags & BIT(UCONTEXT_BANNABLE))) {
@ -268,8 +268,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
 		 */
 		pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm);

-		if (!intel_pxp_is_active(&i915->gt.pxp))
-			ret = intel_pxp_start(&i915->gt.pxp);
+		if (!intel_pxp_is_active(&to_gt(i915)->pxp))
+			ret = intel_pxp_start(&to_gt(i915)->pxp);
 	}

 	return ret;
@ -571,7 +571,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
 	intel_engine_mask_t prev_mask;

 	/* FIXME: This is NIY for execlists */
-	if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
+	if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc)))
 		return -ENODEV;

 	if (get_user(slot, &ext->engine_index))
@ -833,7 +833,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
 		sseu = &pc->legacy_rcs_sseu;
 	}

-	ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu);
+	ret = i915_gem_user_to_context_sseu(to_gt(i915), &user_sseu, sseu);
 	if (ret)
 		return ret;

@ -1044,7 +1044,7 @@ static struct i915_gem_engines *alloc_engines(unsigned int count)
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
 						struct intel_sseu rcs_sseu)
 {
-	const struct intel_gt *gt = &ctx->i915->gt;
+	const struct intel_gt *gt = to_gt(ctx->i915);
 	struct intel_engine_cs *engine;
 	struct i915_gem_engines *e, *err;
 	enum intel_engine_id id;
@ -1521,7 +1521,7 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
 		 * colateral damage, and we should not pretend we can by
 		 * exposing the interface.
 		 */
-		if (!intel_has_reset_engine(&ctx->i915->gt))
+		if (!intel_has_reset_engine(to_gt(ctx->i915)))
 			return -ENODEV;

 		i915_gem_context_clear_persistence(ctx);
@ -1559,7 +1559,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
 	} else if (HAS_FULL_PPGTT(i915)) {
 		struct i915_ppgtt *ppgtt;

-		ppgtt = i915_ppgtt_create(&i915->gt, 0);
+		ppgtt = i915_ppgtt_create(to_gt(i915), 0);
 		if (IS_ERR(ppgtt)) {
 			drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
 				PTR_ERR(ppgtt));
@ -1742,7 +1742,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags)
 		return -EINVAL;

-	ppgtt = i915_ppgtt_create(&i915->gt, 0);
+	ppgtt = i915_ppgtt_create(to_gt(i915), 0);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);

@ -2194,7 +2194,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
 		return -EINVAL;

-	ret = intel_gt_terminally_wedged(&i915->gt);
+	ret = intel_gt_terminally_wedged(to_gt(i915));
 	if (ret)
 		return ret;

--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@ -379,7 +379,7 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
 	if (ext.flags)
 		return -EINVAL;

-	if (!intel_pxp_is_enabled(&ext_data->i915->gt.pxp))
+	if (!intel_pxp_is_enabled(&to_gt(ext_data->i915)->pxp))
 		return -ENODEV;

 	ext_data->flags |= I915_BO_PROTECTED;
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@ -1098,6 +1098,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 	return &i915->ggtt;
 }

+static void reloc_cache_unmap(struct reloc_cache *cache)
+{
+	void *vaddr;
+
+	if (!cache->vaddr)
+		return;
+
+	vaddr = unmask_page(cache->vaddr);
+	if (cache->vaddr & KMAP)
+		kunmap_atomic(vaddr);
+	else
+		io_mapping_unmap_atomic((void __iomem *)vaddr);
+}
+
+static void reloc_cache_remap(struct reloc_cache *cache,
+			      struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+
+	if (!cache->vaddr)
+		return;
+
+	if (cache->vaddr & KMAP) {
+		struct page *page = i915_gem_object_get_page(obj, cache->page);
+
+		vaddr = kmap_atomic(page);
+		cache->vaddr = unmask_flags(cache->vaddr) |
+			(unsigned long)vaddr;
+	} else {
+		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+		unsigned long offset;
+
+		offset = cache->node.start;
+		if (!drm_mm_node_allocated(&cache->node))
+			offset += cache->page << PAGE_SHIFT;
+
+		cache->vaddr = (unsigned long)
+			io_mapping_map_atomic_wc(&ggtt->iomap, offset);
+	}
+}
+
 static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
 {
 	void *vaddr;
@ -1362,10 +1403,17 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 		 * batchbuffers.
 		 */
 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
-		    GRAPHICS_VER(eb->i915) == 6) {
+		    GRAPHICS_VER(eb->i915) == 6 &&
+		    !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
+			struct i915_vma *vma = target->vma;
+
+			reloc_cache_unmap(&eb->reloc_cache);
+			mutex_lock(&vma->vm->mutex);
 			err = i915_vma_bind(target->vma,
 					    target->vma->obj->cache_level,
 					    PIN_GLOBAL, NULL);
+			mutex_unlock(&vma->vm->mutex);
+			reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
 			if (err)
 				return err;
 		}
@ -2361,9 +2409,9 @@ static int eb_submit(struct i915_execbuffer *eb)
 	return err;
 }

-static int num_vcs_engines(const struct drm_i915_private *i915)
+static int num_vcs_engines(struct drm_i915_private *i915)
 {
-	return hweight_long(VDBOX_MASK(&i915->gt));
+	return hweight_long(VDBOX_MASK(to_gt(i915)));
 }

 /*
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@ -73,7 +73,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & ~(I915_MMAP_WC))
 		return -EINVAL;

-	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+	if (args->flags & I915_MMAP_WC && !pat_enabled())
 		return -ENODEV;

 	obj = i915_gem_object_lookup(file, args->handle);
@ -646,7 +646,7 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
 		goto insert;

 	/* Attempt to reap some mmap space from dead objects */
-	err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT,
+	err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT,
 					       NULL);
 	if (err)
 		goto err;
@ -737,7 +737,7 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,

 	if (HAS_LMEM(to_i915(dev)))
 		mmap_type = I915_MMAP_TYPE_FIXED;
-	else if (boot_cpu_has(X86_FEATURE_PAT))
+	else if (pat_enabled())
 		mmap_type = I915_MMAP_TYPE_WC;
 	else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
 		return -ENODEV;
@ -793,7 +793,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 		break;

 	case I915_MMAP_OFFSET_WC:
-		if (!boot_cpu_has(X86_FEATURE_PAT))
+		if (!pat_enabled())
 			return -ENODEV;
 		type = I915_MMAP_TYPE_WC;
 		break;
@ -803,7 +803,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 		break;

 	case I915_MMAP_OFFSET_UC:
-		if (!boot_cpu_has(X86_FEATURE_PAT))
+		if (!pat_enabled())
 			return -ENODEV;
 		type = I915_MMAP_TYPE_UC;
 		break;
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@ -262,6 +262,8 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
 */
 void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
 {
+	assert_object_held(obj);
+
 	if (!list_empty(&obj->vma.list)) {
 		struct i915_vma *vma;

@ -328,7 +330,16 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 			obj->ops->delayed_free(obj);
 			continue;
 		}
+
+		if (!i915_gem_object_trylock(obj, NULL)) {
+			/* busy, toss it back to the pile */
+			if (llist_add(&obj->freed, &i915->mm.free_list))
+				queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
+			continue;
+		}
+
 		__i915_gem_object_pages_fini(obj);
+		i915_gem_object_unlock(obj);
 		__i915_gem_free_object(obj);

 		/* But keep the pointer alive for RCU-protected lookups */
@ -348,7 +359,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
 static void __i915_gem_free_work(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
-		container_of(work, struct drm_i915_private, mm.free_work);
+		container_of(work, struct drm_i915_private, mm.free_work.work);

 	i915_gem_flush_free_objects(i915);
 }
@ -380,7 +391,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 */

 	if (llist_add(&obj->freed, &i915->mm.free_list))
-		queue_work(i915->wq, &i915->mm.free_work);
+		queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
 }

 void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@ -705,7 +716,7 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,

 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
-	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+	INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
 }

 void i915_objects_module_exit(void)
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@ -210,9 +210,13 @@ static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object
 	return __i915_gem_object_lock(obj, ww, true);
 }

-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
+					   struct i915_gem_ww_ctx *ww)
 {
-	return dma_resv_trylock(obj->base.resv);
+	if (!ww)
+		return dma_resv_trylock(obj->base.resv);
+	else
+		return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
 }

 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@ -424,8 +424,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err_unpin;
 		}

-		if (GEM_WARN_ON(type == I915_MAP_WC &&
-				!static_cpu_has(X86_FEATURE_PAT)))
+		if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled()))
 			ptr = ERR_PTR(-ENODEV);
 		else if (i915_gem_object_has_struct_page(obj))
 			ptr = i915_gem_object_map_page(obj, type);
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@ -19,6 +19,7 @@
 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
 	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct scatterlist *sg;
 	struct sg_table *st;
 	dma_addr_t dma;
@ -73,7 +74,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 		dst += PAGE_SIZE;
 	}

-	intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+	intel_gt_chipset_flush(to_gt(i915));

 	/* We're no longer struct page backed */
 	obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE;
@ -140,6 +141,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
 {
 	void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	int err;

 	err = i915_gem_object_wait(obj,
@ -159,7 +161,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
 		return -EFAULT;

 	drm_clflush_virt_range(vaddr, args->size);
-	intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+	intel_gt_chipset_flush(to_gt(i915));

 	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 	return 0;
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@ -35,7 +35,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	intel_gt_suspend_prepare(&i915->gt);
+	intel_gt_suspend_prepare(to_gt(i915));

 	i915_gem_drain_freed_objects(i915);
 }
@ -153,7 +153,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 	 * machine in an unusable condition.
 	 */

-	intel_gt_suspend_late(&i915->gt);
+	intel_gt_suspend_late(to_gt(i915));

 	spin_lock_irqsave(&i915->mm.obj_lock, flags);
 	for (phase = phases; *phase; phase++) {
@ -223,7 +223,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	intel_gt_resume(&i915->gt);
+	intel_gt_resume(to_gt(i915));

 	ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
 	GEM_WARN_ON(ret);
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@ -36,8 +36,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
 }

-static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
-			      unsigned long shrink, bool trylock_vm)
+static int drop_pages(struct drm_i915_gem_object *obj,
+		       unsigned long shrink, bool trylock_vm)
 {
 	unsigned long flags;

@ -153,7 +153,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 	 */
 	if (shrink & I915_SHRINK_ACTIVE)
 		/* Retire requests to unpin all idle contexts */
-		intel_gt_retire_requests(&i915->gt);
+		intel_gt_retire_requests(to_gt(i915));

 	/*
 	 * As we may completely rewrite the (un)bound list whilst unbinding
@ -214,26 +214,24 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,

 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);

-			err = 0;
-			if (unsafe_drop_pages(obj, shrink, trylock_vm)) {
-				/* May arrive from get_pages on another bo */
-				if (!ww) {
-					if (!i915_gem_object_trylock(obj))
-						goto skip;
-				} else {
-					err = i915_gem_object_lock(obj, ww);
-					if (err)
-						goto skip;
-				}
-
-				if (!__i915_gem_object_put_pages(obj)) {
-					if (!try_to_writeback(obj, shrink))
-						count += obj->base.size >> PAGE_SHIFT;
-				}
-				if (!ww)
-					i915_gem_object_unlock(obj);
+			/* May arrive from get_pages on another bo */
+			if (!ww) {
+				if (!i915_gem_object_trylock(obj, NULL))
+					goto skip;
+			} else {
+				err = i915_gem_object_lock(obj, ww);
+				if (err)
+					goto skip;
 			}

+			if (drop_pages(obj, shrink, trylock_vm) &&
+			    !__i915_gem_object_put_pages(obj) &&
+			    !try_to_writeback(obj, shrink))
+				count += obj->base.size >> PAGE_SHIFT;
+
+			if (!ww)
+				i915_gem_object_unlock(obj);
+
 			scanned += obj->base.size >> PAGE_SHIFT;
 skip:
 			i915_gem_object_put(obj);
@ -407,12 +405,18 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	list_for_each_entry_safe(vma, next,
 				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
+		struct drm_i915_gem_object *obj = vma->obj;

 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;

+		if (!i915_gem_object_trylock(obj, NULL))
+			continue;
+
 		if (__i915_vma_unbind(vma) == 0)
 			freed_pages += count;
+
+		i915_gem_object_unlock(obj);
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);

--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@ -488,6 +488,9 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
 		return 0;
 	}

+	/* Exclude the reserved region from driver use */
+	mem->region.end = reserved_base - 1;
+
 	/* It is possible for the reserved area to end before the end of stolen
 	 * memory, so just consider the start. */
 	reserved_total = stolen_top - reserved_base;
@ -653,7 +656,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
 	cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
 	i915_gem_object_set_cache_coherency(obj, cache_level);

-	if (WARN_ON(!i915_gem_object_trylock(obj)))
+	if (WARN_ON(!i915_gem_object_trylock(obj, NULL)))
 		return -EBUSY;

 	i915_gem_object_init_memory_region(obj, mem);
@ -780,6 +783,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 	struct intel_uncore *uncore = &i915->uncore;
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	struct intel_memory_region *mem;
+	resource_size_t min_page_size;
 	resource_size_t io_start;
 	resource_size_t lmem_size;
 	u64 lmem_base;
@ -791,8 +795,11 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 	lmem_size = pci_resource_len(pdev, 2) - lmem_base;
 	io_start = pci_resource_start(pdev, 2) + lmem_base;

+	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
+						I915_GTT_PAGE_SIZE_4K;
+
 	mem = intel_memory_region_create(i915, lmem_base, lmem_size,
-					 I915_GTT_PAGE_SIZE_4K, io_start,
+					 min_page_size, io_start,
 					 type, instance,
 					 &i915_region_stolen_lmem_ops);
 	if (IS_ERR(mem))
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@ -38,12 +38,13 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 {
 	const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	unsigned long idx;
 	long ret;

 	/* ABI: return -EIO if already wedged */
-	ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
+	ret = intel_gt_terminally_wedged(to_gt(i915));
 	if (ret)
 		return ret;

--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@ -166,7 +166,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
 	struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 	const unsigned int max_segment = i915_sg_segment_size();
-	const size_t size = ttm->num_pages << PAGE_SHIFT;
+	const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
 	struct file *filp = i915_tt->filp;
 	struct sgt_iter sgt_iter;
 	struct sg_table *st;
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@ -3,10 +3,9 @@
 * Copyright © 2021 Intel Corporation
 */

-#include <linux/dma-fence-array.h>
-
 #include <drm/ttm/ttm_bo_driver.h>

+#include "i915_deps.h"
 #include "i915_drv.h"
 #include "intel_memory_region.h"
 #include "intel_region_ttm.h"
@ -43,234 +42,6 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
 }
 #endif

-/**
- * DOC: Set of utilities to dynamically collect dependencies and
- * eventually coalesce them into a single fence which is fed into
- * the GT migration code, since it only accepts a single dependency
- * fence.
- * The single fence returned from these utilities, in the case of
- * dependencies from multiple fence contexts, a struct dma_fence_array,
- * since the i915 request code can break that up and await the individual
- * fences.
- *
- * Once we can do async unbinding, this is also needed to coalesce
- * the migration fence with the unbind fences.
- *
- * While collecting the individual dependencies, we store the refcounted
- * struct dma_fence pointers in a realloc-managed pointer array, since
- * that can be easily fed into a dma_fence_array. Other options are
- * available, like for example an xarray for similarity with drm/sched.
- * Can be changed easily if needed.
- *
- * A struct i915_deps need to be initialized using i915_deps_init().
- * If i915_deps_add_dependency() or i915_deps_add_resv() return an
- * error code they will internally call i915_deps_fini(), which frees
- * all internal references and allocations. After a call to
- * i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly
- * be viewed as uninitialized.
- *
- * We might want to break this out into a separate file as a utility.
- */
-
-#define I915_DEPS_MIN_ALLOC_CHUNK 8U
-
-/**
- * struct i915_deps - Collect dependencies into a single dma-fence
- * @single: Storage for pointer if the collection is a single fence.
- * @fence: Allocated array of fence pointers if more than a single fence;
- * otherwise points to the address of @single.
- * @num_deps: Current number of dependency fences.
- * @fences_size: Size of the @fences array in number of pointers.
- * @gfp: Allocation mode.
- */
-struct i915_deps {
-	struct dma_fence *single;
-	struct dma_fence **fences;
-	unsigned int num_deps;
-	unsigned int fences_size;
-	gfp_t gfp;
-};
-
-static void i915_deps_reset_fences(struct i915_deps *deps)
-{
-	if (deps->fences != &deps->single)
-		kfree(deps->fences);
-	deps->num_deps = 0;
-	deps->fences_size = 1;
-	deps->fences = &deps->single;
-}
-
-static void i915_deps_init(struct i915_deps *deps, gfp_t gfp)
-{
-	deps->fences = NULL;
-	deps->gfp = gfp;
-	i915_deps_reset_fences(deps);
-}
-
-static void i915_deps_fini(struct i915_deps *deps)
-{
-	unsigned int i;
-
-	for (i = 0; i < deps->num_deps; ++i)
-		dma_fence_put(deps->fences[i]);
-
-	if (deps->fences != &deps->single)
-		kfree(deps->fences);
-}
-
-static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
-			  const struct ttm_operation_ctx *ctx)
-{
-	int ret;
-
-	if (deps->num_deps >= deps->fences_size) {
-		unsigned int new_size = 2 * deps->fences_size;
-		struct dma_fence **new_fences;
-
-		new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK);
-		new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp);
-		if (!new_fences)
-			goto sync;
-
-		memcpy(new_fences, deps->fences,
-		       deps->fences_size * sizeof(*new_fences));
-		swap(new_fences, deps->fences);
-		if (new_fences != &deps->single)
-			kfree(new_fences);
-		deps->fences_size = new_size;
-	}
-	deps->fences[deps->num_deps++] = dma_fence_get(fence);
-	return 0;
-
-sync:
-	if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) {
-		ret = -EBUSY;
-		goto unref;
-	}
-
-	ret = dma_fence_wait(fence, ctx->interruptible);
-	if (ret)
-		goto unref;
-
-	ret = fence->error;
-	if (ret)
-		goto unref;
-
-	return 0;
-
-unref:
-	i915_deps_fini(deps);
-	return ret;
-}
-
-static int i915_deps_sync(struct i915_deps *deps,
-			  const struct ttm_operation_ctx *ctx)
-{
-	struct dma_fence **fences = deps->fences;
-	unsigned int i;
-	int ret = 0;
-
-	for (i = 0; i < deps->num_deps; ++i, ++fences) {
-		if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) {
-			ret = -EBUSY;
-			break;
-		}
-
-		ret = dma_fence_wait(*fences, ctx->interruptible);
-		if (!ret)
-			ret = (*fences)->error;
-		if (ret)
-			break;
-	}
-
-	i915_deps_fini(deps);
-	return ret;
-}
-
-static int i915_deps_add_dependency(struct i915_deps *deps,
-				    struct dma_fence *fence,
-				    const struct ttm_operation_ctx *ctx)
-{
-	unsigned int i;
-	int ret;
-
-	if (!fence)
-		return 0;
-
-	if (dma_fence_is_signaled(fence)) {
-		ret = fence->error;
-		if (ret)
-			i915_deps_fini(deps);
-		return ret;
-	}
-
-	for (i = 0; i < deps->num_deps; ++i) {
-		struct dma_fence *entry = deps->fences[i];
-
-		if (!entry->context || entry->context != fence->context)
-			continue;
-
-		if (dma_fence_is_later(fence, entry)) {
-			dma_fence_put(entry);
-			deps->fences[i] = dma_fence_get(fence);
-		}
-
-		return 0;
-	}
-
-	return i915_deps_grow(deps, fence, ctx);
-}
-
-static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps,
-					    const struct ttm_operation_ctx *ctx)
-{
-	struct dma_fence_array *array;
-
-	if (deps->num_deps == 0)
-		return NULL;
-
-	if (deps->num_deps == 1) {
-		deps->num_deps = 0;
-		return deps->fences[0];
-	}
-
-	/*
-	 * TODO: Alter the allocation mode here to not try too hard to
-	 * make things async.
-	 */
-	array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0,
-				       false);
-	if (!array)
-		return ERR_PTR(i915_deps_sync(deps, ctx));
-
-	deps->fences = NULL;
-	i915_deps_reset_fences(deps);
-
-	return &array->base;
-}
-
-static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
-			      bool all, const bool no_excl,
-			      const struct ttm_operation_ctx *ctx)
-{
-	struct dma_resv_iter iter;
-	struct dma_fence *fence;
-
-	dma_resv_assert_held(resv);
-	dma_resv_for_each_fence(&iter, resv, all, fence) {
-		int ret;
-
-		if (no_excl && dma_resv_iter_is_exclusive(&iter))
-			continue;
-
-		ret = i915_deps_add_dependency(deps, fence, ctx);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 		     struct ttm_tt *ttm)
@ -387,7 +158,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
 					     struct ttm_resource *dst_mem,
 					     struct ttm_tt *dst_ttm,
 					     struct sg_table *dst_st,
-					     struct dma_fence *dep)
+					     const struct i915_deps *deps)
 {
 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
 						     bdev);
@ -397,7 +168,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
 	enum i915_cache_level src_level, dst_level;
 	int ret;

-	if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
+	if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
 		return ERR_PTR(-EINVAL);

 	/* With fail_gpu_migration, we always perform a GPU clear. */
@ -410,8 +181,8 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
 		    !I915_SELFTEST_ONLY(fail_gpu_migration))
 			return ERR_PTR(-EINVAL);

-		intel_engine_pm_get(i915->gt.migrate.context->engine);
-		ret = intel_context_migrate_clear(i915->gt.migrate.context, dep,
+		intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+		ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
 						  dst_st->sgl, dst_level,
 						  i915_ttm_gtt_binds_lmem(dst_mem),
 						  0, &rq);
@ -423,9 +194,9 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
 			return ERR_CAST(src_rsgt);

 		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
-		intel_engine_pm_get(i915->gt.migrate.context->engine);
-		ret = intel_context_migrate_copy(i915->gt.migrate.context,
-						 dep, src_rsgt->table.sgl,
+		intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+		ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
+						 deps, src_rsgt->table.sgl,
 						 src_level,
 						 i915_ttm_gtt_binds_lmem(bo->resource),
 						 dst_st->sgl, dst_level,
@ -435,7 +206,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
 		i915_refct_sgt_put(src_rsgt);
 	}

-	intel_engine_pm_put(i915->gt.migrate.context->engine);
+	intel_engine_pm_put(to_gt(i915)->migrate.context->engine);

 	if (ret && rq) {
 		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
@ -610,10 +381,11 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
 }

 static struct dma_fence *
-__i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+__i915_ttm_move(struct ttm_buffer_object *bo,
+		const struct ttm_operation_ctx *ctx, bool clear,
 		struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
 		struct i915_refct_sgt *dst_rsgt, bool allow_accel,
-		struct dma_fence *move_dep)
+		const struct i915_deps *move_deps)
 {
 	struct i915_ttm_memcpy_work *copy_work = NULL;
 	struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
@ -621,7 +393,7 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,

 	if (allow_accel) {
 		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
-					    &dst_rsgt->table, move_dep);
+					    &dst_rsgt->table, move_deps);

 		/*
 		 * We only need to intercept the error when moving to lmem.
@ -655,8 +427,8 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,

 		if (!IS_ERR(fence))
 			goto out;
-	} else if (move_dep) {
-		int err = dma_fence_wait(move_dep, true);
+	} else if (move_deps) {
+		int err = i915_deps_sync(move_deps, ctx);

 		if (err)
 			return ERR_PTR(err);
@ -680,29 +452,17 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
 	return fence;
 }

-static struct dma_fence *prev_fence(struct ttm_buffer_object *bo,
-				    struct ttm_operation_ctx *ctx)
+static int
+prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+	  struct i915_deps *deps)
 {
-	struct i915_deps deps;
 	int ret;

-	/*
-	 * Instead of trying hard with GFP_KERNEL to allocate memory,
-	 * the dependency collection will just sync if it doesn't
-	 * succeed.
-	 */
-	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
-	ret = i915_deps_add_dependency(&deps, bo->moving, ctx);
+	ret = i915_deps_add_dependency(deps, bo->moving, ctx);
 	if (!ret)
-		/*
-		 * TODO: Only await excl fence here, and shared fences before
-		 * signaling the migration fence.
-		 */
-		ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx);
-	if (ret)
-		return ERR_PTR(ret);
+		ret = i915_deps_add_resv(deps, bo->base.resv, ctx);

-	return i915_deps_to_fence(&deps, ctx);
+	return ret;
 }

 /**
@ -756,16 +516,18 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,

 	clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
 	if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
-		struct dma_fence *dep = prev_fence(bo, ctx);
+		struct i915_deps deps;

-		if (IS_ERR(dep)) {
+		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+		ret = prev_deps(bo, ctx, &deps);
+		if (ret) {
 			i915_refct_sgt_put(dst_rsgt);
-			return PTR_ERR(dep);
+			return ret;
 		}

-		migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
-						  dst_rsgt, true, dep);
-		dma_fence_put(dep);
+		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+						  dst_rsgt, true, &deps);
+		i915_deps_fini(&deps);
 	}

 	/* We can possibly get an -ERESTARTSYS here */
@ -826,47 +588,38 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 		.interruptible = intr,
 	};
 	struct i915_refct_sgt *dst_rsgt;
-	struct dma_fence *copy_fence, *dep_fence;
+	struct dma_fence *copy_fence;
 	struct i915_deps deps;
-	int ret, shared_err;
+	int ret;

 	assert_object_held(dst);
 	assert_object_held(src);
 	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);

-	/*
-	 * We plan to add a shared fence only for the source. If that
-	 * fails, we await all source fences before commencing
-	 * the copy instead of only the exclusive.
-	 */
-	shared_err = dma_resv_reserve_shared(src_bo->base.resv, 1);
-	ret = i915_deps_add_resv(&deps, dst_bo->base.resv, true, false, &ctx);
-	if (!ret)
-		ret = i915_deps_add_resv(&deps, src_bo->base.resv,
-					 !!shared_err, false, &ctx);
+	ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
 	if (ret)
 		return ret;

-	dep_fence = i915_deps_to_fence(&deps, &ctx);
-	if (IS_ERR(dep_fence))
-		return PTR_ERR(dep_fence);
+	ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx);
+	if (ret)
+		return ret;
+
+	ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx);
+	if (ret)
+		return ret;

 	dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
-	copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource,
+	copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
 				     dst_bo->ttm, dst_rsgt, allow_accel,
-				     dep_fence);
+				     &deps);

+	i915_deps_fini(&deps);
 	i915_refct_sgt_put(dst_rsgt);
 	if (IS_ERR_OR_NULL(copy_fence))
 		return PTR_ERR_OR_ZERO(copy_fence);

 	dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
-
-	/* If we failed to reserve a shared slot, add an exclusive fence */
-	if (shared_err)
-		dma_resv_add_excl_fence(src_bo->base.resv, copy_fence);
-	else
-		dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
+	dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);

 	dma_fence_put(copy_fence);

--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@ -529,7 +529,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		 * On almost all of the older hw, we cannot tell the GPU that
 		 * a page is readonly.
 		 */
-		if (!dev_priv->gt.vm->has_read_only)
+		if (!to_gt(dev_priv)->vm->has_read_only)
 			return -ENODEV;
 	}

--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@ -1705,7 +1705,7 @@ int i915_gem_huge_page_mock_selftests(void)
 	mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
 	mkwrite_device_info(dev_priv)->ppgtt_size = 48;

-	ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
+	ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
 		goto out_unlock;
@ -1747,7 +1747,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 		return 0;
 	}

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

 	return i915_live_subtests(tests, i915);
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@ -592,7 +592,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_client_tiled_blits),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

 	return i915_live_subtests(tests, i915);
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@ -90,7 +90,7 @@ static int live_nop_switch(void *arg)
 		}
 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
-			intel_gt_set_wedged(&i915->gt);
+			intel_gt_set_wedged(to_gt(i915));
 			i915_request_put(rq);
 			err = -EIO;
 			goto out_file;
@ -146,7 +146,7 @@ static int live_nop_switch(void *arg)
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Switching between %ld contexts timed out\n",
 				       prime);
-				intel_gt_set_wedged(&i915->gt);
+				intel_gt_set_wedged(to_gt(i915));
 				i915_request_put(rq);
 				break;
 			}
@ -1223,7 +1223,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		return 0;

 	if (flags & TEST_RESET)
-		igt_global_reset_lock(&i915->gt);
+		igt_global_reset_lock(to_gt(i915));

 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
@ -1306,7 +1306,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,

 out_unlock:
 	if (flags & TEST_RESET)
-		igt_global_reset_unlock(&i915->gt);
+		igt_global_reset_unlock(to_gt(i915));

 	if (ret)
 		pr_err("%s: Failed with %d!\n", name, ret);
@ -1481,10 +1481,10 @@ static int check_scratch(struct i915_address_space *vm, u64 offset)

 static int write_to_scratch(struct i915_gem_context *ctx,
 			    struct intel_engine_cs *engine,
+			    struct drm_i915_gem_object *obj,
 			    u64 offset, u32 value)
 {
 	struct drm_i915_private *i915 = ctx->i915;
-	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
 	struct i915_request *rq;
 	struct i915_vma *vma;
@ -1497,15 +1497,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		return err;

-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
-	if (IS_ERR(cmd)) {
-		err = PTR_ERR(cmd);
-		goto out;
-	}
+	if (IS_ERR(cmd))
+		return PTR_ERR(cmd);

 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
 	if (GRAPHICS_VER(i915) >= 8) {
@ -1569,17 +1563,19 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	i915_vma_unpin(vma);
 out_vm:
 	i915_vm_put(vm);
-out:
-	i915_gem_object_put(obj);
+
+	if (!err)
+		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
 	return err;
 }

 static int read_from_scratch(struct i915_gem_context *ctx,
 			     struct intel_engine_cs *engine,
+			     struct drm_i915_gem_object *obj,
 			     u64 offset, u32 *value)
 {
 	struct drm_i915_private *i915 = ctx->i915;
-	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
 	const u32 result = 0x100;
 	struct i915_request *rq;
@ -1594,10 +1590,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		return err;

-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
 	if (GRAPHICS_VER(i915) >= 8) {
 		const u32 GPR0 = engine->mmio_base + 0x600;

@ -1615,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
 		if (IS_ERR(cmd)) {
 			err = PTR_ERR(cmd);
-			goto out;
+			goto err_unpin;
 		}

 		memset(cmd, POISON_INUSE, PAGE_SIZE);
@ -1651,7 +1643,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
 		if (IS_ERR(cmd)) {
 			err = PTR_ERR(cmd);
-			goto out;
+			goto err_unpin;
 		}

 		memset(cmd, POISON_INUSE, PAGE_SIZE);
@ -1722,8 +1714,10 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	i915_vma_unpin(vma);
 out_vm:
 	i915_vm_put(vm);
-out:
-	i915_gem_object_put(obj);
+
+	if (!err)
+		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
 	return err;
 }

@ -1757,6 +1751,7 @@ static int igt_vm_isolation(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx_a, *ctx_b;
+	struct drm_i915_gem_object *obj_a, *obj_b;
 	unsigned long num_engines, count;
 	struct intel_engine_cs *engine;
 	struct igt_live_test t;
@ -1810,6 +1805,18 @@ static int igt_vm_isolation(void *arg)
 	vm_total = ctx_a->vm->total;
 	GEM_BUG_ON(ctx_b->vm->total != vm_total);

+	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj_a)) {
+		err = PTR_ERR(obj_a);
+		goto out_file;
+	}
+
+	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj_b)) {
+		err = PTR_ERR(obj_b);
+		goto put_a;
+	}
+
 	count = 0;
 	num_engines = 0;
 	for_each_uabi_engine(engine, i915) {
@ -1832,13 +1839,13 @@ static int igt_vm_isolation(void *arg)
 						   I915_GTT_PAGE_SIZE, vm_total,
 						   sizeof(u32), alignof_dword);

-			err = write_to_scratch(ctx_a, engine,
+			err = write_to_scratch(ctx_a, engine, obj_a,
 					       offset, 0xdeadbeef);
 			if (err == 0)
-				err = read_from_scratch(ctx_b, engine,
+				err = read_from_scratch(ctx_b, engine, obj_b,
 							offset, &value);
 			if (err)
-				goto out_file;
+				goto put_b;

 			if (value != expected) {
 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@ -1847,7 +1854,7 @@ static int igt_vm_isolation(void *arg)
 				       lower_32_bits(offset),
 				       this);
 				err = -EINVAL;
-				goto out_file;
+				goto put_b;
 			}

 			this++;
@ -1858,6 +1865,10 @@ static int igt_vm_isolation(void *arg)
 	pr_info("Checked %lu scratch offsets across %lu engines\n",
 		count, num_engines);

+put_b:
+	i915_gem_object_put(obj_b);
+put_a:
+	i915_gem_object_put(obj_a);
 out_file:
 	if (igt_live_test_end(&t))
 		err = -EIO;
@ -1877,7 +1888,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_vm_isolation),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

 	return i915_live_subtests(tests, i915);
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@ -261,5 +261,5 @@ int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 	if (!HAS_LMEM(i915))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@ -84,6 +84,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 				 struct rnd_state *prng)
 {
 	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt_view view;
 	struct i915_vma *vma;
 	unsigned long page;
@ -141,7 +142,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 	if (offset >= obj->base.size)
 		goto out;

-	intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+	intel_gt_flush_ggtt_writes(to_gt(i915));

 	p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 	cpu = kmap(p) + offset_in_page(offset);
@ -175,6 +176,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
 {
 	const unsigned int nreal = obj->scratch / PAGE_SIZE;
 	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_vma *vma;
 	unsigned long page;
 	int err;
@ -234,7 +236,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;

-		intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+		intel_gt_flush_ggtt_writes(to_gt(i915));

 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@ -616,14 +618,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
-	intel_gt_pm_get(&i915->gt);
-	cancel_delayed_work_sync(&i915->gt.requests.retire_work);
+	intel_gt_pm_get(to_gt(i915));
+	cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
 }

 static void restore_retire_worker(struct drm_i915_private *i915)
 {
 	igt_flush_test(i915);
-	intel_gt_pm_put(&i915->gt);
+	intel_gt_pm_put(to_gt(i915));
 	i915_gem_driver_register__shrinker(i915);
 }

@ -651,8 +653,8 @@ static int igt_mmap_offset_exhaustion(void *arg)

 	/* Disable background reaper */
 	disable_retire_worker(i915);
-	GEM_BUG_ON(!i915->gt.awake);
-	intel_gt_retire_requests(&i915->gt);
+	GEM_BUG_ON(!to_gt(i915)->awake);
+	intel_gt_retire_requests(to_gt(i915));
 	i915_gem_drain_freed_objects(i915);

 	/* Trim the device mmap space to only a page */
@ -728,7 +730,7 @@ static int igt_mmap_offset_exhaustion(void *arg)

 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
-		if (intel_gt_is_wedged(&i915->gt))
+		if (intel_gt_is_wedged(to_gt(i915)))
 			break;

 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
@ -942,7 +944,7 @@ static int __igt_mmap(struct drm_i915_private *i915,
 	}

 	if (type == I915_MMAP_TYPE_GTT)
-		intel_gt_flush_ggtt_writes(&i915->gt);
+		intel_gt_flush_ggtt_writes(to_gt(i915));

 	err = wc_check(obj);
 	if (err == -ENXIO)
@ -1049,7 +1051,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
 		goto out_unmap;
 	}

-	intel_gt_flush_ggtt_writes(&i915->gt);
+	intel_gt_flush_ggtt_writes(to_gt(i915));

 	err = access_process_vm(current, addr, &x, sizeof(x), 0);
 	if (err != sizeof(x)) {
@ -1065,7 +1067,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
 		goto out_unmap;
 	}

-	intel_gt_flush_ggtt_writes(&i915->gt);
+	intel_gt_flush_ggtt_writes(to_gt(i915));

 	err = __get_user(y, ptr);
 	if (err) {
@ -1165,7 +1167,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
 	}

 	if (type == I915_MMAP_TYPE_GTT)
-		intel_gt_flush_ggtt_writes(&i915->gt);
+		intel_gt_flush_ggtt_writes(to_gt(i915));

 	for_each_uabi_engine(engine, i915) {
 		struct i915_request *rq;
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@ -269,19 +269,6 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 	free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 }

-static int pd_vma_set_pages(struct i915_vma *vma)
-{
-	vma->pages = ERR_PTR(-ENODEV);
-	return 0;
-}
-
-static void pd_vma_clear_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-
-	vma->pages = NULL;
-}
-
 static void pd_vma_bind(struct i915_address_space *vm,
 			struct i915_vm_pt_stash *stash,
 			struct i915_vma *vma,
@ -321,8 +308,6 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
 }

 static const struct i915_vma_ops pd_vma_ops = {
-	.set_pages = pd_vma_set_pages,
-	.clear_pages = pd_vma_clear_pages,
 	.bind_vma = pd_vma_bind,
 	.unbind_vma = pd_vma_unbind,
 };
@ -454,6 +439,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;

 	ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
+	ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
 	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;

 	err = gen6_ppgtt_init_scratch(ppgtt);
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@ -776,10 +776,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	 */
 	ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);

-	if (HAS_LMEM(gt->i915))
+	if (HAS_LMEM(gt->i915)) {
 		ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
-	else
+
+		/*
+		 * On some platforms the hw has dropped support for 4K GTT pages
+		 * when dealing with LMEM, and due to the design of 64K GTT
+		 * pages in the hw, we can only mark the *entire* page-table as
+		 * operating in 64K GTT mode, since the enable bit is still on
+		 * the pde, and not the pte. And since we still need to allow
+		 * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
+		 * page-table with scratch pointing to LMEM, since that's
+		 * undefined from the hw pov. The simplest solution is to just
+		 * move the 64K scratch page to SMEM on such platforms and call
+		 * it a day, since that should work for all configurations.
+		 */
+		if (HAS_64K_PAGES(gt->i915))
+			ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
+		else
+			ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
+	} else {
 		ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+		ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
+	}

 	err = gen8_init_scratch(&ppgtt->vm);
 	if (err)
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce)

 static inline void intel_context_mark_active(struct intel_context *ce)
 {
-	lockdep_assert_held(&ce->timeline->mutex);
+	lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+		       test_bit(CONTEXT_IS_PARKING, &ce->flags));
 	++ce->active_count;
 }

--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@ -118,6 +118,7 @@ struct intel_context {
 #define CONTEXT_LRCA_DIRTY		9
 #define CONTEXT_GUC_INIT		10
 #define CONTEXT_PERMA_PIN		11
+#define CONTEXT_IS_PARKING		12

 	struct {
 		u64 timeout_us;
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@ -26,7 +26,7 @@ static void dbg_poison_ce(struct intel_context *ce)
 		int type = i915_coherent_map_type(ce->engine->i915, obj, true);
 		void *map;

-		if (!i915_gem_object_trylock(obj))
+		if (!i915_gem_object_trylock(obj, NULL))
 			return;

 		map = i915_gem_object_pin_map(obj, type);
@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	return 0;
 }

-#if IS_ENABLED(CONFIG_LOCKDEP)
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
-	return flags;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
-				   unsigned long flags)
-{
-	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
-	local_irq_restore(flags);
-}
-
-#else
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
-	return 0;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
-				   unsigned long flags)
-{
-}
-
-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
-
 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct i915_request *rq = to_request(fence);
@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce = engine->kernel_context;
 	struct i915_request *rq;
-	unsigned long flags;
 	bool result = true;

 	/*
@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * engine->wakeref.count, we may see the request completion and retire
 	 * it causing an underflow of the engine->wakeref.
 	 */
-	flags = __timeline_mark_lock(ce);
+	set_bit(CONTEXT_IS_PARKING, &ce->flags);
 	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);

 	rq = __i915_request_create(ce, GFP_NOWAIT);
@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)

 	result = false;
 out_unlock:
-	__timeline_mark_unlock(ce, flags);
+	clear_bit(CONTEXT_IS_PARKING, &ce->flags);
 	return result;
 }

--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@ -116,7 +116,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 			disabled |= (I915_SCHEDULER_CAP_ENABLED |
 				     I915_SCHEDULER_CAP_PRIORITY);

-		if (intel_uc_uses_guc_submission(&i915->gt.uc))
+		if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
 			enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP;

 		for (i = 0; i < ARRAY_SIZE(map); i++) {
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@ -22,9 +22,6 @@
 #include "intel_gtt.h"
 #include "gen8_ppgtt.h"

-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma);
-
 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
 				   unsigned long color,
 				   u64 *start,
@ -892,21 +889,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	return 0;
 }

-int ggtt_set_pages(struct i915_vma *vma)
-{
-	int ret;
-
-	GEM_BUG_ON(vma->pages);
-
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
-
-	vma->page_sizes = vma->obj->mm.page_sizes;
-
-	return 0;
-}
-
 static void gen6_gmch_remove(struct i915_address_space *vm)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@ -941,6 +923,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 		size = gen8_get_total_gtt_size(snb_gmch_ctl);

 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
 	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;

 	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
@ -967,8 +950,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)

 	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;

 	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;

@ -1094,6 +1075,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;

 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;

 	ggtt->vm.clear_range = nop_clear_range;
 	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
@ -1117,8 +1099,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)

 	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;

 	return ggtt_probe_common(ggtt, size);
 }
@ -1146,6 +1126,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
 		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);

 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;

 	if (needs_idle_maps(i915)) {
 		drm_notice(&i915->drm,
@ -1162,8 +1143,6 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)

 	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
-	ggtt->vm.vma_ops.clear_pages = clear_pages;

 	if (unlikely(ggtt->do_idle_maps))
 		drm_notice(&i915->drm,
@ -1229,7 +1208,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 {
 	int ret;

-	ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+	ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915));
 	if (ret)
 		return ret;

@ -1333,382 +1312,3 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)

 	intel_ggtt_restore_fences(ggtt);
 }
-
-static struct scatterlist *
-rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
-	     unsigned int width, unsigned int height,
-	     unsigned int src_stride, unsigned int dst_stride,
-	     struct sg_table *st, struct scatterlist *sg)
-{
-	unsigned int column, row;
-	unsigned int src_idx;
-
-	for (column = 0; column < width; column++) {
-		unsigned int left;
-
-		src_idx = src_stride * (height - 1) + column + offset;
-		for (row = 0; row < height; row++) {
-			st->nents++;
-			/*
-			 * We don't need the pages, but need to initialize
-			 * the entries so the sg list can be happily traversed.
-			 * The only thing we need are DMA addresses.
-			 */
-			sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
-			sg_dma_address(sg) =
-				i915_gem_object_get_dma_address(obj, src_idx);
-			sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
-			sg = sg_next(sg);
-			src_idx -= src_stride;
-		}
-
-		left = (dst_stride - height) * I915_GTT_PAGE_SIZE;
-
-		if (!left)
-			continue;
-
-		st->nents++;
-
-		/*
-		 * The DE ignores the PTEs for the padding tiles, the sg entry
-		 * here is just a conenience to indicate how many padding PTEs
-		 * to insert at this spot.
-		 */
-		sg_set_page(sg, NULL, left, 0);
-		sg_dma_address(sg) = 0;
-		sg_dma_len(sg) = left;
-		sg = sg_next(sg);
-	}
-
-	return sg;
-}
-
-static noinline struct sg_table *
-intel_rotate_pages(struct intel_rotation_info *rot_info,
-		   struct drm_i915_gem_object *obj)
-{
-	unsigned int size = intel_rotation_info_size(rot_info);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	int ret = -ENOMEM;
-	int i;
-
-	/* Allocate target SG list. */
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, size, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++)
-		sg = rotate_pages(obj, rot_info->plane[i].offset,
-				  rot_info->plane[i].width, rot_info->plane[i].height,
-				  rot_info->plane[i].src_stride,
-				  rot_info->plane[i].dst_stride,
-				  st, sg);
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-
-	drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-		obj->base.size, rot_info->plane[0].width,
-		rot_info->plane[0].height, size);
-
-	return ERR_PTR(ret);
-}
-
-static struct scatterlist *
-add_padding_pages(unsigned int count,
-		  struct sg_table *st, struct scatterlist *sg)
-{
-	st->nents++;
-
-	/*
-	 * The DE ignores the PTEs for the padding tiles, the sg entry
-	 * here is just a convenience to indicate how many padding PTEs
-	 * to insert at this spot.
-	 */
-	sg_set_page(sg, NULL, count * I915_GTT_PAGE_SIZE, 0);
-	sg_dma_address(sg) = 0;
-	sg_dma_len(sg) = count * I915_GTT_PAGE_SIZE;
-	sg = sg_next(sg);
-
-	return sg;
-}
-
-static struct scatterlist *
-remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj,
-			      unsigned int offset, unsigned int alignment_pad,
-			      unsigned int width, unsigned int height,
-			      unsigned int src_stride, unsigned int dst_stride,
-			      struct sg_table *st, struct scatterlist *sg,
-			      unsigned int *gtt_offset)
-{
-	unsigned int row;
-
-	if (!width || !height)
-		return sg;
-
-	if (alignment_pad)
-		sg = add_padding_pages(alignment_pad, st, sg);
-
-	for (row = 0; row < height; row++) {
-		unsigned int left = width * I915_GTT_PAGE_SIZE;
-
-		while (left) {
-			dma_addr_t addr;
-			unsigned int length;
-
-			/*
-			 * We don't need the pages, but need to initialize
-			 * the entries so the sg list can be happily traversed.
-			 * The only thing we need are DMA addresses.
-			 */
-
-			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
-
-			length = min(left, length);
-
-			st->nents++;
-
-			sg_set_page(sg, NULL, length, 0);
-			sg_dma_address(sg) = addr;
-			sg_dma_len(sg) = length;
-			sg = sg_next(sg);
-
-			offset += length / I915_GTT_PAGE_SIZE;
-			left -= length;
-		}
-
-		offset += src_stride - width;
-
-		left = (dst_stride - width) * I915_GTT_PAGE_SIZE;
-
-		if (!left)
-			continue;
-
-		sg = add_padding_pages(left >> PAGE_SHIFT, st, sg);
-	}
-
-	*gtt_offset += alignment_pad + dst_stride * height;
-
-	return sg;
-}
-
-static struct scatterlist *
-remap_contiguous_pages(struct drm_i915_gem_object *obj,
-		       unsigned int obj_offset,
-		       unsigned int count,
-		       struct sg_table *st, struct scatterlist *sg)
-{
-	struct scatterlist *iter;
-	unsigned int offset;
-
-	iter = i915_gem_object_get_sg_dma(obj, obj_offset, &offset);
-	GEM_BUG_ON(!iter);
-
-	do {
-		unsigned int len;
-
-		len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
-			  count << PAGE_SHIFT);
-		sg_set_page(sg, NULL, len, 0);
-		sg_dma_address(sg) =
-			sg_dma_address(iter) + (offset << PAGE_SHIFT);
-		sg_dma_len(sg) = len;
-
-		st->nents++;
-		count -= len >> PAGE_SHIFT;
-		if (count == 0)
-			return sg;
-
-		sg = __sg_next(sg);
-		iter = __sg_next(iter);
-		offset = 0;
-	} while (1);
-}
-
-static struct scatterlist *
-remap_linear_color_plane_pages(struct drm_i915_gem_object *obj,
-			       unsigned int obj_offset, unsigned int alignment_pad,
-			       unsigned int size,
-			       struct sg_table *st, struct scatterlist *sg,
-			       unsigned int *gtt_offset)
-{
-	if (!size)
-		return sg;
-
-	if (alignment_pad)
-		sg = add_padding_pages(alignment_pad, st, sg);
-
-	sg = remap_contiguous_pages(obj, obj_offset, size, st, sg);
-	sg = sg_next(sg);
-
-	*gtt_offset += alignment_pad + size;
-
-	return sg;
-}
-
-static struct scatterlist *
-remap_color_plane_pages(const struct intel_remapped_info *rem_info,
-			struct drm_i915_gem_object *obj,
-			int color_plane,
-			struct sg_table *st, struct scatterlist *sg,
-			unsigned int *gtt_offset)
-{
-	unsigned int alignment_pad = 0;
-
-	if (rem_info->plane_alignment)
-		alignment_pad = ALIGN(*gtt_offset, rem_info->plane_alignment) - *gtt_offset;
-
-	if (rem_info->plane[color_plane].linear)
-		sg = remap_linear_color_plane_pages(obj,
-						    rem_info->plane[color_plane].offset,
-						    alignment_pad,
-						    rem_info->plane[color_plane].size,
-						    st, sg,
-						    gtt_offset);
-
-	else
-		sg = remap_tiled_color_plane_pages(obj,
-						   rem_info->plane[color_plane].offset,
-						   alignment_pad,
-						   rem_info->plane[color_plane].width,
-						   rem_info->plane[color_plane].height,
-						   rem_info->plane[color_plane].src_stride,
-						   rem_info->plane[color_plane].dst_stride,
-						   st, sg,
-						   gtt_offset);
-
-	return sg;
-}
-
-static noinline struct sg_table *
-intel_remap_pages(struct intel_remapped_info *rem_info,
-		  struct drm_i915_gem_object *obj)
-{
-	unsigned int size = intel_remapped_info_size(rem_info);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *st;
-	struct scatterlist *sg;
-	unsigned int gtt_offset = 0;
-	int ret = -ENOMEM;
-	int i;
-
-	/* Allocate target SG list. */
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, size, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
-		sg = remap_color_plane_pages(rem_info, obj, i, st, sg, &gtt_offset);
-
-	i915_sg_trim(st);
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-
-	drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-		obj->base.size, rem_info->plane[0].width,
-		rem_info->plane[0].height, size);
-
-	return ERR_PTR(ret);
-}
-
-static noinline struct sg_table *
-intel_partial_pages(const struct i915_ggtt_view *view,
-		    struct drm_i915_gem_object *obj)
-{
-	struct sg_table *st;
-	struct scatterlist *sg;
-	unsigned int count = view->partial.size;
-	int ret = -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		goto err_st_alloc;
-
-	ret = sg_alloc_table(st, count, GFP_KERNEL);
-	if (ret)
-		goto err_sg_alloc;
-
-	st->nents = 0;
-
-	sg = remap_contiguous_pages(obj, view->partial.offset, count, st, st->sgl);
-
-	sg_mark_end(sg);
-	i915_sg_trim(st); /* Drop any unused tail entries. */
-
-	return st;
-
-err_sg_alloc:
-	kfree(st);
-err_st_alloc:
-	return ERR_PTR(ret);
-}
-
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma)
-{
-	int ret;
-
-	/*
-	 * The vma->pages are only valid within the lifespan of the borrowed
-	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
-	 * must be the vma->pages. A simple rule is that vma->pages must only
-	 * be accessed when the obj->mm.pages are pinned.
-	 */
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
-
-	switch (vma->ggtt_view.type) {
-	default:
-		GEM_BUG_ON(vma->ggtt_view.type);
-		fallthrough;
-	case I915_GGTT_VIEW_NORMAL:
-		vma->pages = vma->obj->mm.pages;
-		return 0;
-
-	case I915_GGTT_VIEW_ROTATED:
-		vma->pages =
-			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
-		break;
-
-	case I915_GGTT_VIEW_REMAPPED:
-		vma->pages =
-			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
-		break;
-
-	case I915_GGTT_VIEW_PARTIAL:
-		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
-		break;
-	}
-
-	ret = 0;
-	if (IS_ERR(vma->pages)) {
-		ret = PTR_ERR(vma->pages);
-		vma->pages = NULL;
-		drm_err(&vma->vm->i915->drm,
-			"Failed to get pages for VMA view type %u (%d)!\n",
-			vma->ggtt_view.type, ret);
-	}
-	return ret;
-}
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@ -25,11 +25,8 @@
 #include "shmem_utils.h"
 #include "pxp/intel_pxp.h"

-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 {
-	gt->i915 = i915;
-	gt->uncore = &i915->uncore;
-
 	spin_lock_init(&gt->irq_lock);

 	INIT_LIST_HEAD(&gt->closed_vma);
@ -48,6 +45,12 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	intel_rps_init_early(&gt->rps);
 }

+void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+{
+	gt->i915 = i915;
+	gt->uncore = &i915->uncore;
+}
+
 int intel_gt_probe_lmem(struct intel_gt *gt)
 {
 	struct drm_i915_private *i915 = gt->i915;
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@ -35,6 +35,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
 }

 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
+void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
 void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
 int intel_gt_probe_lmem(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@ -9,11 +9,6 @@
 #include "intel_engine_pm.h"
 #include "intel_gt_buffer_pool.h"

-static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
-{
-	return container_of(pool, struct intel_gt, buffer_pool);
-}
-
 static struct list_head *
 bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
 {
@ -141,7 +136,7 @@ static struct intel_gt_buffer_pool_node *
 node_create(struct intel_gt_buffer_pool *pool, size_t sz,
 	    enum i915_map_type type)
 {
-	struct intel_gt *gt = to_gt(pool);
+	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
 	struct intel_gt_buffer_pool_node *node;
 	struct drm_i915_gem_object *obj;

--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
@ -10,11 +10,7 @@

 struct intel_gt;

-#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name)				\
-	static int __name ## _open(struct inode *inode, struct file *file) \
-{									\
-	return single_open(file, __name ## _show, inode->i_private);	\
-}									\
+#define __GT_DEBUGFS_ATTRIBUTE_FOPS(__name)				\
 static const struct file_operations __name ## _fops = {			\
 	.owner = THIS_MODULE,						\
 	.open = __name ## _open,					\
@ -23,6 +19,21 @@ static const struct file_operations __name ## _fops = {			\
 	.release = single_release,					\
 }

+#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name)			\
+static int __name ## _open(struct inode *inode, struct file *file)	\
+{									\
+	return single_open(file, __name ## _show, inode->i_private);	\
+}									\
+__GT_DEBUGFS_ATTRIBUTE_FOPS(__name)
+
+#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(__name, __size_vf)		\
+static int __name ## _open(struct inode *inode, struct file *file)		\
+{										\
+	return single_open_size(file, __name ## _show, inode->i_private,	\
+			    __size_vf(inode->i_private));			\
+}										\
+__GT_DEBUGFS_ATTRIBUTE_FOPS(__name)
+
 void intel_gt_debugfs_register(struct intel_gt *gt);

 struct intel_gt_debugfs_file {
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@ -224,19 +224,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	INIT_LIST_HEAD(&vm->bound_list);
 }

-void clear_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-
-	if (vma->pages != vma->obj->mm.pages) {
-		sg_free_table(vma->pages);
-		kfree(vma->pages);
-	}
-	vma->pages = NULL;
-
-	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
-}
-
 void *__px_vaddr(struct drm_i915_gem_object *p)
 {
 	enum i915_map_type type;
@ -302,7 +289,7 @@ int setup_scratch_page(struct i915_address_space *vm)
 	do {
 		struct drm_i915_gem_object *obj;

-		obj = vm->alloc_pt_dma(vm, size);
+		obj = vm->alloc_scratch_dma(vm, size);
 		if (IS_ERR(obj))
 			goto skip;

@ -338,6 +325,18 @@ int setup_scratch_page(struct i915_address_space *vm)
 		if (size == I915_GTT_PAGE_SIZE_4K)
 			return -ENOMEM;

+		/*
+		 * If we need 64K minimum GTT pages for device local-memory,
+		 * like on XEHPSDV, then we need to fail the allocation here,
+		 * otherwise we can't safely support the insertion of
+		 * local-memory pages for this vm, since the HW expects the
+		 * correct physical alignment and size when the page-table is
+		 * operating in 64K GTT mode, which includes any scratch PTEs,
+		 * since userspace can still touch them.
+		 */
+		if (HAS_64K_PAGES(vm->i915))
+			return -ENOMEM;
+
 		size = I915_GTT_PAGE_SIZE_4K;
 	} while (1);
 }
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@ -209,9 +209,6 @@ struct i915_vma_ops {
 	 */
 	void (*unbind_vma)(struct i915_address_space *vm,
 			   struct i915_vma *vma);
-
-	int (*set_pages)(struct i915_vma *vma);
-	void (*clear_pages)(struct i915_vma *vma);
 };

 struct i915_address_space {
@ -268,6 +265,8 @@ struct i915_address_space {

 	struct drm_i915_gem_object *
 		(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
+	struct drm_i915_gem_object *
+		(*alloc_scratch_dma)(struct i915_address_space *vm, int sz);

 	u64 (*pte_encode)(dma_addr_t addr,
 			  enum i915_cache_level level,
@ -599,10 +598,6 @@ release_pd_entry(struct i915_page_directory * const pd,
 		 const struct drm_i915_gem_object * const scratch);
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);

-int ggtt_set_pages(struct i915_vma *vma);
-int ppgtt_set_pages(struct i915_vma *vma);
-void clear_pages(struct i915_vma *vma);
-
 void ppgtt_bind_vma(struct i915_address_space *vm,
 		    struct i915_vm_pt_stash *stash,
 		    struct i915_vma *vma,
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@ -404,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size)

 int
 intel_context_migrate_copy(struct intel_context *ce,
-			   struct dma_fence *await,
+			   const struct i915_deps *deps,
 			   struct scatterlist *src,
 			   enum i915_cache_level src_cache_level,
 			   bool src_is_lmem,
@ -431,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce,
 			goto out_ce;
 		}

-		if (await) {
-			err = i915_request_await_dma_fence(rq, await);
+		if (deps) {
+			err = i915_request_await_deps(rq, deps);
 			if (err)
 				goto out_rq;

@ -442,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce,
 					goto out_rq;
 			}

-			await = NULL;
+			deps = NULL;
 		}

 		/* The PTE updates + copy must not be interrupted. */
@ -525,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value)

 int
 intel_context_migrate_clear(struct intel_context *ce,
-			    struct dma_fence *await,
+			    const struct i915_deps *deps,
 			    struct scatterlist *sg,
 			    enum i915_cache_level cache_level,
 			    bool is_lmem,
@ -550,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce,
 			goto out_ce;
 		}

-		if (await) {
-			err = i915_request_await_dma_fence(rq, await);
+		if (deps) {
+			err = i915_request_await_deps(rq, deps);
 			if (err)
 				goto out_rq;

@ -561,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce,
 					goto out_rq;
 			}

-			await = NULL;
+			deps = NULL;
 		}

 		/* The PTE updates + clear must not be interrupted. */
@ -599,7 +599,7 @@ intel_context_migrate_clear(struct intel_context *ce,

 int intel_migrate_copy(struct intel_migrate *m,
 		       struct i915_gem_ww_ctx *ww,
-		       struct dma_fence *await,
+		       const struct i915_deps *deps,
 		       struct scatterlist *src,
 		       enum i915_cache_level src_cache_level,
 		       bool src_is_lmem,
@ -624,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m,
 	if (err)
 		goto out;

-	err = intel_context_migrate_copy(ce, await,
+	err = intel_context_migrate_copy(ce, deps,
 					 src, src_cache_level, src_is_lmem,
 					 dst, dst_cache_level, dst_is_lmem,
 					 out);
@ -638,7 +638,7 @@ int intel_migrate_copy(struct intel_migrate *m,
 int
 intel_migrate_clear(struct intel_migrate *m,
 		    struct i915_gem_ww_ctx *ww,
-		    struct dma_fence *await,
+		    const struct i915_deps *deps,
 		    struct scatterlist *sg,
 		    enum i915_cache_level cache_level,
 		    bool is_lmem,
@ -661,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m,
 	if (err)
 		goto out;

-	err = intel_context_migrate_clear(ce, await, sg, cache_level,
+	err = intel_context_migrate_clear(ce, deps, sg, cache_level,
 					  is_lmem, value, out);

 	intel_context_unpin(ce);
--- a/drivers/gpu/drm/i915/gt/intel_migrate.h
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.h
@ -11,6 +11,7 @@
 #include "intel_migrate_types.h"

 struct dma_fence;
+struct i915_deps;
 struct i915_request;
 struct i915_gem_ww_ctx;
 struct intel_gt;
@ -23,7 +24,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m);

 int intel_migrate_copy(struct intel_migrate *m,
 		       struct i915_gem_ww_ctx *ww,
-		       struct dma_fence *await,
+		       const struct i915_deps *deps,
 		       struct scatterlist *src,
 		       enum i915_cache_level src_cache_level,
 		       bool src_is_lmem,
@ -33,7 +34,7 @@ int intel_migrate_copy(struct intel_migrate *m,
 		       struct i915_request **out);

 int intel_context_migrate_copy(struct intel_context *ce,
-			       struct dma_fence *await,
+			       const struct i915_deps *deps,
 			       struct scatterlist *src,
 			       enum i915_cache_level src_cache_level,
 			       bool src_is_lmem,
@ -45,7 +46,7 @@ int intel_context_migrate_copy(struct intel_context *ce,
 int
 intel_migrate_clear(struct intel_migrate *m,
 		    struct i915_gem_ww_ctx *ww,
-		    struct dma_fence *await,
+		    const struct i915_deps *deps,
 		    struct scatterlist *sg,
 		    enum i915_cache_level cache_level,
 		    bool is_lmem,
@ -53,7 +54,7 @@ intel_migrate_clear(struct intel_migrate *m,
 		    struct i915_request **out);
 int
 intel_context_migrate_clear(struct intel_context *ce,
-			    struct dma_fence *await,
+			    const struct i915_deps *deps,
 			    struct scatterlist *sg,
 			    enum i915_cache_level cache_level,
 			    bool is_lmem,
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@ -289,16 +289,6 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm,
 	}
 }

-int ppgtt_set_pages(struct i915_vma *vma)
-{
-	GEM_BUG_ON(vma->pages);
-
-	vma->pages = vma->obj->mm.pages;
-	vma->page_sizes = vma->obj->mm.page_sizes;
-
-	return 0;
-}
-
 void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
 		unsigned long lmem_pt_obj_flags)
 {
@ -315,6 +305,4 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,

 	ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
 	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
-	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
-	ppgtt->vm.vma_ops.clear_pages = clear_pages;
 }
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@ -197,6 +197,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
 	struct intel_uncore *uncore = gt->uncore;
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	struct intel_memory_region *mem;
+	resource_size_t min_page_size;
 	resource_size_t io_start;
 	resource_size_t lmem_size;
 	int err;
@ -211,10 +212,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
 	if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
 		return ERR_PTR(-ENODEV);

+	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
+						I915_GTT_PAGE_SIZE_4K;
 	mem = intel_memory_region_create(i915,
 					 0,
 					 lmem_size,
-					 I915_GTT_PAGE_SIZE_4K,
+					 min_page_size,
 					 io_start,
 					 INTEL_MEMORY_LOCAL,
 					 0,
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@ -2226,6 +2226,65 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps)
 		return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
 }

+static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE;
+
+	/* Allow punit to process software requests */
+	intel_uncore_write(uncore, GEN6_RP_CONTROL, state);
+}
+
+void intel_rps_raise_unslice(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u32 rp0_unslice_req;
+
+	mutex_lock(&rps->lock);
+
+	if (rps_uses_slpc(rps)) {
+		/* RP limits have not been initialized yet for SLPC path */
+		rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0)
+				   & 0xff) * GEN9_FREQ_SCALER;
+
+		intel_rps_set_manual(rps, true);
+		intel_uncore_write(uncore, GEN6_RPNSWREQ,
+				   ((rp0_unslice_req <<
+				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
+				   GEN9_IGNORE_SLICE_RATIO));
+		intel_rps_set_manual(rps, false);
+	} else {
+		intel_rps_set(rps, rps->rp0_freq);
+	}
+
+	mutex_unlock(&rps->lock);
+}
+
+void intel_rps_lower_unslice(struct intel_rps *rps)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u32 rpn_unslice_req;
+
+	mutex_lock(&rps->lock);
+
+	if (rps_uses_slpc(rps)) {
+		/* RP limits have not been initialized yet for SLPC path */
+		rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16)
+				   & 0xff) * GEN9_FREQ_SCALER;
+
+		intel_rps_set_manual(rps, true);
+		intel_uncore_write(uncore, GEN6_RPNSWREQ,
+				   ((rpn_unslice_req <<
+				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
+				   GEN9_IGNORE_SLICE_RATIO));
+		intel_rps_set_manual(rps, false);
+	} else {
+		intel_rps_set(rps, rps->min_freq);
+	}
+
+	mutex_unlock(&rps->lock);
+}
+
 /* External interface for intel_ips.ko */

 static struct drm_i915_private __rcu *ips_mchdev;
@ -2302,7 +2361,7 @@ unsigned long i915_read_mch_val(void)
 		return 0;

 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		struct intel_ips *ips = &i915->gt.rps.ips;
+		struct intel_ips *ips = &to_gt(i915)->rps.ips;

 		spin_lock_irq(&mchdev_lock);
 		chipset_val = __ips_chipset_val(ips);
@ -2329,7 +2388,7 @@ bool i915_gpu_raise(void)
 	if (!i915)
 		return false;

-	rps = &i915->gt.rps;
+	rps = &to_gt(i915)->rps;

 	spin_lock_irq(&mchdev_lock);
 	if (rps->max_freq_softlimit < rps->max_freq)
@ -2356,7 +2415,7 @@ bool i915_gpu_lower(void)
 	if (!i915)
 		return false;

-	rps = &i915->gt.rps;
+	rps = &to_gt(i915)->rps;

 	spin_lock_irq(&mchdev_lock);
 	if (rps->max_freq_softlimit > rps->min_freq)
@ -2382,7 +2441,7 @@ bool i915_gpu_busy(void)
 	if (!i915)
 		return false;

-	ret = i915->gt.awake;
+	ret = to_gt(i915)->awake;

 	drm_dev_put(&i915->drm);
 	return ret;
@ -2405,11 +2464,11 @@ bool i915_gpu_turbo_disable(void)
 	if (!i915)
 		return false;

-	rps = &i915->gt.rps;
+	rps = &to_gt(i915)->rps;

 	spin_lock_irq(&mchdev_lock);
 	rps->max_freq_softlimit = rps->min_freq;
-	ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq);
+	ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq);
 	spin_unlock_irq(&mchdev_lock);

 	drm_dev_put(&i915->drm);
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@ -45,6 +45,8 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
 u32 intel_rps_read_punit_req(struct intel_rps *rps);
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
 u32 intel_rps_read_state_cap(struct intel_rps *rps);
+void intel_rps_raise_unslice(struct intel_rps *rps);
+void intel_rps_lower_unslice(struct intel_rps *rps);

 void gen5_rps_irq_handler(struct intel_rps *rps);
 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@ -929,7 +929,7 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 static void
 gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
-	const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
+	const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
 	unsigned int slice, subslice;
 	u32 mcr, mcr_mask;

--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@ -17,7 +17,7 @@ static int mock_timeline_pin(struct intel_timeline *tl)
 {
 	int err;

-	if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj)))
+	if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj, NULL)))
 		return -EBUSY;

 	err = intel_timeline_pin_map(tl);
@ -345,7 +345,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	struct mock_engine *engine;

 	GEM_BUG_ON(id >= I915_NUM_ENGINES);
-	GEM_BUG_ON(!i915->gt.uncore);
+	GEM_BUG_ON(!to_gt(i915)->uncore);

 	engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
 	if (!engine)
@ -353,8 +353,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,

 	/* minimal engine setup for requests */
 	engine->base.i915 = i915;
-	engine->base.gt = &i915->gt;
-	engine->base.uncore = i915->gt.uncore;
+	engine->base.gt = to_gt(i915);
+	engine->base.uncore = to_gt(i915)->uncore;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id;
 	engine->base.mask = BIT(id);
@ -377,8 +377,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,

 	engine->base.release = mock_engine_release;

-	i915->gt.engine[id] = &engine->base;
-	i915->gt.engine_class[0][id] = &engine->base;
+	to_gt(i915)->engine[id] = &engine->base;
+	to_gt(i915)->engine_class[0][id] = &engine->base;

 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@ -442,7 +442,7 @@ int intel_context_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_active_context),
 		SUBTEST(live_remote_context),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	if (intel_gt_is_wedged(gt))
 		return 0;
--- a/drivers/gpu/drm/i915/gt/selftest_engine.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine.c
@ -12,7 +12,7 @@ int intel_engine_live_selftests(struct drm_i915_private *i915)
 		live_engine_pm_selftests,
 		NULL,
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	typeof(*tests) *fn;

 	for (fn = tests; *fn; fn++) {
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@ -361,10 +361,10 @@ int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
 		SUBTEST(perf_mi_noop),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }

 static int intel_mmio_bases_check(void *arg)
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@ -378,13 +378,13 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
 	int saved_hangcheck;
 	int err;

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

 	saved_hangcheck = i915->params.enable_hangcheck;
 	i915->params.enable_hangcheck = INT_MAX;

-	err = intel_gt_live_subtests(tests, &i915->gt);
+	err = intel_gt_live_subtests(tests, to_gt(i915));

 	i915->params.enable_hangcheck = saved_hangcheck;
 	return err;
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@ -229,7 +229,7 @@ static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
 	start = ktime_get();
 	while (intel_engine_get_busy_time(engine, &unused) == busyness) {
 		dt = ktime_get() - start;
-		if (dt > 500000) {
+		if (dt > 10000000) {
 			pr_err("active wait timed out %lld\n", dt);
 			ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
 			return -ETIME;
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@ -4502,11 +4502,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_virtual_reset),
 	};

-	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
+	if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
 		return 0;

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@ -193,10 +193,10 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_gt_resume),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }

 int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
@ -210,8 +210,8 @@ int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_rc6_ctx_wa),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@ -2018,7 +2018,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_reset_evict_fence),
 		SUBTEST(igt_handle_error),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	intel_wakeref_t wakeref;
 	int err;

--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@ -1847,5 +1847,5 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@ -442,7 +442,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(thread_global_copy),
 		SUBTEST(thread_global_clear),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	if (!gt->migrate.context)
 		return 0;
@ -465,7 +465,7 @@ create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
 			return obj;
 	}

-	i915_gem_object_trylock(obj);
+	i915_gem_object_trylock(obj, NULL);
 	err = i915_gem_object_pin_pages(obj);
 	if (err) {
 		i915_gem_object_unlock(obj);
@ -658,7 +658,7 @@ int intel_migrate_perf_selftests(struct drm_i915_private *i915)
 		SUBTEST(perf_clear_blt),
 		SUBTEST(perf_copy_blt),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	if (intel_gt_is_wedged(gt))
 		return 0;
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@ -451,5 +451,5 @@ int intel_mocs_live_selftests(struct drm_i915_private *i915)
 	if (!get_mocs_settings(i915, &table))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@ -376,7 +376,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_atomic_reset),
 		SUBTEST(igt_atomic_engine_reset),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	if (!intel_has_gpu_reset(gt))
 		return 0;
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@ -291,8 +291,8 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_ctx_switch_wa),
 	};

-	if (i915->gt.submission_method > INTEL_SUBMISSION_RING)
+	if (to_gt(i915)->submission_method > INTEL_SUBMISSION_RING)
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@ -39,7 +39,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
 static int live_slpc_clamp_min(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
 	struct intel_rps *rps = &gt->rps;
 	struct intel_engine_cs *engine;
@ -166,7 +166,7 @@ static int live_slpc_clamp_min(void *arg)
 static int live_slpc_clamp_max(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	struct intel_guc_slpc *slpc;
 	struct intel_rps *rps;
 	struct intel_engine_cs *engine;
@ -304,7 +304,7 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_slpc_clamp_min),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

 	return i915_live_subtests(tests, i915);
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@ -159,7 +159,7 @@ static int mock_hwsp_freelist(void *arg)
 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);

-	state.gt = &i915->gt;
+	state.gt = to_gt(i915);

 	/*
 	 * Create a bunch of timelines and check that their HWSP do not overlap.
@ -1416,8 +1416,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_hwsp_rollover_user),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@ -1387,8 +1387,8 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_engine_reset_workarounds),
 	};

-	if (intel_gt_is_wedged(&i915->gt))
+	if (intel_gt_is_wedged(to_gt(i915)))
 		return 0;

-	return intel_gt_live_subtests(tests, &i915->gt);
+	return intel_gt_live_subtests(tests, to_gt(i915));
 }
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@ -94,6 +94,11 @@ struct intel_guc {
 		 * @guc_ids: used to allocate new guc_ids, single-lrc
 		 */
 		struct ida guc_ids;
+		/**
+		 * @num_guc_ids: Number of guc_ids, selftest feature to be able
+		 * to reduce this number while testing.
+		 */
+		int num_guc_ids;
 		/**
 		 * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
 		 */
@ -202,6 +207,13 @@ struct intel_guc {
 		 */
 		struct delayed_work work;
 	} timestamp;
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+	/**
+	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
+	 */
+	int number_guc_id_stolen;
+#endif
 };

 static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@ -523,6 +523,15 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
 		CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
 			 ktime_ms_delta(ktime_get(), ct->stall_time),
 			 send->status, recv->status);
+		CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
+			 atomic_read(&ct->ctbs.send.space) * 4);
+		CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
+		CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
+		CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
+			 atomic_read(&ct->ctbs.recv.space) * 4);
+		CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
+		CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
+
 		ct->ctbs.send.broken = true;
 	}

@ -582,12 +591,19 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)

 static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw)
 {
+	bool h2g = h2g_has_room(ct, h2g_dw);
+	bool g2h = g2h_has_room(ct, g2h_dw);
+
 	lockdep_assert_held(&ct->ctbs.send.lock);

-	if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) {
+	if (unlikely(!h2g || !g2h)) {
 		if (ct->stall_time == KTIME_MAX)
 			ct->stall_time = ktime_get();

+		/* Be paranoid and kick G2H tasklet to free credits */
+		if (!g2h)
+			tasklet_hi_schedule(&ct->receive_tasklet);
+
 		if (unlikely(ct_deadlocked(ct)))
 			return -EPIPE;
 		else
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@ -40,9 +40,8 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
 	}
 }

-/* Copy RSA signature from the fw image to HW for verification */
-static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
-			struct intel_uncore *uncore)
+static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw,
+			     struct intel_uncore *uncore)
 {
 	u32 rsa[UOS_RSA_SCRATCH_COUNT];
 	size_t copied;
@ -58,6 +57,27 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
 	return 0;
 }

+static int guc_xfer_rsa_vma(struct intel_uc_fw *guc_fw,
+			    struct intel_uncore *uncore)
+{
+	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
+
+	intel_uncore_write(uncore, UOS_RSA_SCRATCH(0),
+			   intel_guc_ggtt_offset(guc, guc_fw->rsa_data));
+
+	return 0;
+}
+
+/* Copy RSA signature from the fw image to HW for verification */
+static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
+			struct intel_uncore *uncore)
+{
+	if (guc_fw->rsa_data)
+		return guc_xfer_rsa_vma(guc_fw, uncore);
+	else
+		return guc_xfer_rsa_mmio(guc_fw, uncore);
+}
+
 /*
 * Read the GuC status register (GUC_STATUS) and store it in the
 * specified location; then return a boolean indicating whether
@ -142,7 +162,10 @@ int intel_guc_fw_upload(struct intel_guc *guc)
 	/*
 	 * Note that GuC needs the CSS header plus uKernel code to be copied
 	 * by the DMA engine in one operation, whereas the RSA signature is
-	 * loaded via MMIO.
+	 * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+	 * register (if key size <= 256) or through a ggtt-pinned vma (if key
+	 * size > 256). The RSA size and therefore the way we provide it to the
+	 * HW is fixed for each platform and hard-coded in the bootrom.
 	 */
 	ret = guc_xfer_rsa(&guc->fw, uncore);
 	if (ret)
@ -164,6 +187,6 @@ int intel_guc_fw_upload(struct intel_guc *guc)
 	return 0;

 out:
-	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL);
+	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
 	return ret;
 }
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@ -15,9 +15,12 @@

 struct intel_guc;

-#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
 #define CRASH_BUFFER_SIZE	SZ_2M
 #define DEBUG_BUFFER_SIZE	SZ_16M
+#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define CRASH_BUFFER_SIZE	SZ_1M
+#define DEBUG_BUFFER_SIZE	SZ_2M
 #else
 #define CRASH_BUFFER_SIZE	SZ_8K
 #define DEBUG_BUFFER_SIZE	SZ_64K
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@ -10,28 +10,80 @@
 #include "intel_guc.h"
 #include "intel_guc_log.h"
 #include "intel_guc_log_debugfs.h"
+#include "intel_uc.h"
+
+static u32 obj_to_guc_log_dump_size(struct drm_i915_gem_object *obj)
+{
+	u32 size;
+
+	if (!obj)
+		return PAGE_SIZE;
+
+	/* "0x%08x 0x%08x 0x%08x 0x%08x\n" => 16 bytes -> 44 chars => x2.75 */
+	size = ((obj->base.size * 11) + 3) / 4;
+
+	/* Add padding for final blank line, any extra header info, etc. */
+	size = PAGE_ALIGN(size + PAGE_SIZE);
+
+	return size;
+}
+
+static u32 guc_log_dump_size(struct intel_guc_log *log)
+{
+	struct intel_guc *guc = log_to_guc(log);
+
+	if (!intel_guc_is_supported(guc))
+		return PAGE_SIZE;
+
+	if (!log->vma)
+		return PAGE_SIZE;
+
+	return obj_to_guc_log_dump_size(log->vma->obj);
+}

 static int guc_log_dump_show(struct seq_file *m, void *data)
 {
 	struct drm_printer p = drm_seq_file_printer(m);
+	int ret;

-	return intel_guc_log_dump(m->private, &p, false);
+	ret = intel_guc_log_dump(m->private, &p, false);
+
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m))
+		pr_warn_once("preallocated size:%zx for %s exceeded\n",
+			     m->size, __func__);
+
+	return ret;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_log_dump, guc_log_dump_size);
+
+static u32 guc_load_err_dump_size(struct intel_guc_log *log)
+{
+	struct intel_guc *guc = log_to_guc(log);
+	struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+
+	if (!intel_guc_is_supported(guc))
+		return PAGE_SIZE;
+
+	return obj_to_guc_log_dump_size(uc->load_err_log);
 }
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);

 static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
 {
 	struct drm_printer p = drm_seq_file_printer(m);

+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m))
+		pr_warn_once("preallocated size:%zx for %s exceeded\n",
+			     m->size, __func__);
+
 	return intel_guc_log_dump(m->private, &p, true);
 }
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_load_err_log_dump, guc_load_err_dump_size);

 static int guc_log_level_get(void *data, u64 *val)
 {
 	struct intel_guc_log *log = data;

-	if (!intel_guc_is_used(log_to_guc(log)))
+	if (!log->vma)
 		return -ENODEV;

 	*val = intel_guc_log_get_level(log);
@ -43,7 +95,7 @@ static int guc_log_level_set(void *data, u64 val)
 {
 	struct intel_guc_log *log = data;

-	if (!intel_guc_is_used(log_to_guc(log)))
+	if (!log->vma)
 		return -ENODEV;

 	return intel_guc_log_set_level(log, val);
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@ -623,7 +623,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
 	if (unlikely(ret < 0))
 		return ret;

-	intel_guc_pm_intrmsk_enable(&i915->gt);
+	intel_guc_pm_intrmsk_enable(to_gt(i915));

 	slpc_get_rp_values(slpc);

--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@ -145,7 +145,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
 * multi-lrc.
 */
-#define NUMBER_MULTI_LRC_GUC_ID		(GUC_MAX_LRC_DESCRIPTORS / 16)
+#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
+	((guc)->submission_state.num_guc_ids / 16)

 /*
 * Below is a set of functions which control the GuC scheduling state which
@ -1040,8 +1041,6 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)

 		spin_unlock(&ce->guc_state.lock);

-		GEM_BUG_ON(!do_put && !destroyed);
-
 		if (pending_enable || destroyed || deregister) {
 			decr_outstanding_submission_g2h(guc);
 			if (deregister)
@ -1206,7 +1205,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	 * start_gt_clk is derived from GuC state. To get a consistent
 	 * view of activity, we query the GuC state only if gt is awake.
 	 */
-	if (intel_gt_pm_get_if_awake(gt) && !in_reset) {
+	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
 		stats_saved = *stats;
 		gt_stamp_saved = guc->timestamp.gt_stamp;
 		guc_update_engine_gt_clks(engine);
@ -1777,7 +1776,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
 		  destroyed_worker_func);

 	guc->submission_state.guc_ids_bitmap =
-		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
 	if (!guc->submission_state.guc_ids_bitmap)
 		return -ENOMEM;

@ -1871,13 +1870,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)

 	if (intel_context_is_parent(ce))
 		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
-					      NUMBER_MULTI_LRC_GUC_ID,
+					      NUMBER_MULTI_LRC_GUC_ID(guc),
 					      order_base_2(ce->parallel.number_children
 							   + 1));
 	else
 		ret = ida_simple_get(&guc->submission_state.guc_ids,
-				     NUMBER_MULTI_LRC_GUC_ID,
-				     GUC_MAX_LRC_DESCRIPTORS,
+				     NUMBER_MULTI_LRC_GUC_ID(guc),
+				     guc->submission_state.num_guc_ids,
 				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
 				     __GFP_NOWARN);
 	if (unlikely(ret < 0))
@ -1935,14 +1934,18 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
 		GEM_BUG_ON(intel_context_is_parent(cn));

 		list_del_init(&cn->guc_id.link);
-		ce->guc_id = cn->guc_id;
+		ce->guc_id.id = cn->guc_id.id;

-		spin_lock(&ce->guc_state.lock);
+		spin_lock(&cn->guc_state.lock);
 		clr_context_registered(cn);
-		spin_unlock(&ce->guc_state.lock);
+		spin_unlock(&cn->guc_state.lock);

 		set_context_guc_id_invalid(cn);

+#ifdef CONFIG_DRM_I915_SELFTEST
+		guc->number_guc_id_stolen++;
+#endif
+
 		return 0;
 	} else {
 		return -EAGAIN;
@ -2646,7 +2649,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
 	unsigned long flags;
 	bool disabled;

-	lockdep_assert_held(&guc->submission_state.lock);
 	GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
 	GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
 	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
@ -2662,7 +2664,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
 	}
 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 	if (unlikely(disabled)) {
-		__release_guc_id(guc, ce);
+		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 		return;
 	}
@ -2696,36 +2698,48 @@ static void __guc_context_destroy(struct intel_context *ce)

 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
 {
-	struct intel_context *ce, *cn;
+	struct intel_context *ce;
 	unsigned long flags;

 	GEM_BUG_ON(!submission_disabled(guc) &&
 		   guc_submission_initialized(guc));

-	spin_lock_irqsave(&guc->submission_state.lock, flags);
-	list_for_each_entry_safe(ce, cn,
-				 &guc->submission_state.destroyed_contexts,
-				 destroyed_link) {
-		list_del_init(&ce->destroyed_link);
-		__release_guc_id(guc, ce);
+	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
+		spin_lock_irqsave(&guc->submission_state.lock, flags);
+		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
+					      struct intel_context,
+					      destroyed_link);
+		if (ce)
+			list_del_init(&ce->destroyed_link);
+		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+		if (!ce)
+			break;
+
+		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 	}
-	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
 }

 static void deregister_destroyed_contexts(struct intel_guc *guc)
 {
-	struct intel_context *ce, *cn;
+	struct intel_context *ce;
 	unsigned long flags;

-	spin_lock_irqsave(&guc->submission_state.lock, flags);
-	list_for_each_entry_safe(ce, cn,
-				 &guc->submission_state.destroyed_contexts,
-				 destroyed_link) {
-		list_del_init(&ce->destroyed_link);
+	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
+		spin_lock_irqsave(&guc->submission_state.lock, flags);
+		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
+					      struct intel_context,
+					      destroyed_link);
+		if (ce)
+			list_del_init(&ce->destroyed_link);
+		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+		if (!ce)
+			break;
+
 		guc_lrc_desc_unpin(ce);
 	}
-	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
 }

 static void destroyed_worker_func(struct work_struct *w)
@ -3770,6 +3784,7 @@ static bool __guc_submission_selected(struct intel_guc *guc)

 void intel_guc_submission_init_early(struct intel_guc *guc)
 {
+	guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
 	guc->submission_supported = __guc_submission_supported(guc);
 	guc->submission_selected = __guc_submission_selected(guc);
 }
@ -4018,11 +4033,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 					 const u32 *msg, u32 len)
 {
 	struct intel_engine_cs *engine;
+	struct intel_gt *gt = guc_to_gt(guc);
 	u8 guc_class, instance;
 	u32 reason;

 	if (unlikely(len != 3)) {
-		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+		drm_err(&gt->i915->drm, "Invalid length %u", len);
 		return -EPROTO;
 	}

@ -4032,12 +4048,19 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,

 	engine = guc_lookup_engine(guc, guc_class, instance);
 	if (unlikely(!engine)) {
-		drm_err(&guc_to_gt(guc)->i915->drm,
+		drm_err(&gt->i915->drm,
 			"Invalid engine %d:%d", guc_class, instance);
 		return -EPROTO;
 	}

-	intel_gt_handle_error(guc_to_gt(guc), engine->mask,
+	/*
+	 * This is an unexpected failure of a hardware feature. So, log a real
+	 * error message not just the informational that comes with the reset.
+	 */
+	drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
+		guc_class, instance, engine->name, reason);
+
+	intel_gt_handle_error(gt, engine->mask,
 			      I915_ERROR_CAPTURE,
 			      "GuC failed to reset %s (reason=0x%08x)\n",
 			      engine->name, reason);
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@ -54,65 +54,6 @@ void intel_huc_init_early(struct intel_huc *huc)
 	}
 }

-static int intel_huc_rsa_data_create(struct intel_huc *huc)
-{
-	struct intel_gt *gt = huc_to_gt(huc);
-	struct intel_guc *guc = &gt->uc.guc;
-	struct i915_vma *vma;
-	size_t copied;
-	void *vaddr;
-	int err;
-
-	err = i915_inject_probe_error(gt->i915, -ENXIO);
-	if (err)
-		return err;
-
-	/*
-	 * HuC firmware will sit above GUC_GGTT_TOP and will not map
-	 * through GTT. Unfortunately, this means GuC cannot perform
-	 * the HuC auth. as the rsa offset now falls within the GuC
-	 * inaccessible range. We resort to perma-pinning an additional
-	 * vma within the accessible range that only contains the rsa
-	 * signature. The GuC can use this extra pinning to perform
-	 * the authentication since its GGTT offset will be GuC
-	 * accessible.
-	 */
-	GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE);
-	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
-						 i915_coherent_map_type(gt->i915,
-									vma->obj, true));
-	if (IS_ERR(vaddr)) {
-		i915_vma_unpin_and_release(&vma, 0);
-		err = PTR_ERR(vaddr);
-		goto unpin_out;
-	}
-
-	copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
-	i915_gem_object_unpin_map(vma->obj);
-
-	if (copied < huc->fw.rsa_size) {
-		err = -ENOMEM;
-		goto unpin_out;
-	}
-
-	huc->rsa_data = vma;
-
-	return 0;
-
-unpin_out:
-	i915_vma_unpin_and_release(&vma, 0);
-	return err;
-}
-
-static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
-{
-	i915_vma_unpin_and_release(&huc->rsa_data, 0);
-}
-
 int intel_huc_init(struct intel_huc *huc)
 {
 	struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@ -122,21 +63,10 @@ int intel_huc_init(struct intel_huc *huc)
 	if (err)
 		goto out;

-	/*
-	 * HuC firmware image is outside GuC accessible range.
-	 * Copy the RSA signature out of the image into
-	 * a perma-pinned region set aside for it
-	 */
-	err = intel_huc_rsa_data_create(huc);
-	if (err)
-		goto out_fini;
-
 	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);

 	return 0;

-out_fini:
-	intel_uc_fw_fini(&huc->fw);
 out:
 	i915_probe_error(i915, "failed with %d\n", err);
 	return err;
@ -147,7 +77,6 @@ void intel_huc_fini(struct intel_huc *huc)
 	if (!intel_uc_fw_is_loadable(&huc->fw))
 		return;

-	intel_huc_rsa_data_destroy(huc);
 	intel_uc_fw_fini(&huc->fw);
 }

@ -177,7 +106,7 @@ int intel_huc_auth(struct intel_huc *huc)
 		goto fail;

 	ret = intel_guc_auth_huc(guc,
-				 intel_guc_ggtt_offset(guc, huc->rsa_data));
+				 intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
 	if (ret) {
 		DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
 		goto fail;
@ -199,7 +128,7 @@ int intel_huc_auth(struct intel_huc *huc)

 fail:
 	i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
-	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL);
+	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
 	return ret;
 }

--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@ -15,8 +15,6 @@ struct intel_huc {
 	struct intel_uc_fw fw;

 	/* HuC-specific additions */
-	struct i915_vma *rsa_data;
-
 	struct {
 		i915_reg_t reg;
 		u32 mask;
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@ -8,6 +8,7 @@
 #include "intel_guc.h"
 #include "intel_guc_ads.h"
 #include "intel_guc_submission.h"
+#include "gt/intel_rps.h"
 #include "intel_uc.h"

 #include "i915_drv.h"
@ -462,6 +463,8 @@ static int __uc_init_hw(struct intel_uc *uc)
 	else
 		attempts = 1;

+	intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
+
 	while (attempts--) {
 		/*
 		 * Always reset the GuC just before (re)loading, so
@ -499,6 +502,9 @@ static int __uc_init_hw(struct intel_uc *uc)
 		ret = intel_guc_slpc_enable(&guc->slpc);
 		if (ret)
 			goto err_submission;
+	} else {
+		/* Restore GT back to RPn for non-SLPC path */
+		intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
 	}

 	drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
@ -529,6 +535,9 @@ static int __uc_init_hw(struct intel_uc *uc)
 err_log_capture:
 	__uc_capture_load_err_log(uc);
 err_out:
+	/* Return GT back to RPn */
+	intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
+
 	__uc_sanitize(uc);

 	if (!ret) {
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@ -48,22 +48,39 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 * Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same
 * firmware as TGL.
 */
-#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
-	fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \
-	fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl,  7, 9, 3)) \
-	fw_def(DG1,         0, guc_def(dg1, 62, 0, 0), huc_def(dg1,  7, 9, 3)) \
-	fw_def(ROCKETLAKE,  0, guc_def(tgl, 62, 0, 0), huc_def(tgl,  7, 9, 3)) \
-	fw_def(TIGERLAKE,   0, guc_def(tgl, 62, 0, 0), huc_def(tgl,  7, 9, 3)) \
-	fw_def(JASPERLAKE,  0, guc_def(ehl, 62, 0, 0), huc_def(ehl,  9, 0, 0)) \
-	fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl,  9, 0, 0)) \
-	fw_def(ICELAKE,     0, guc_def(icl, 62, 0, 0), huc_def(icl,  9, 0, 0)) \
-	fw_def(COMETLAKE,   5, guc_def(cml, 62, 0, 0), huc_def(cml,  4, 0, 0)) \
-	fw_def(COMETLAKE,   0, guc_def(kbl, 62, 0, 0), huc_def(kbl,  4, 0, 0)) \
-	fw_def(COFFEELAKE,  0, guc_def(kbl, 62, 0, 0), huc_def(kbl,  4, 0, 0)) \
-	fw_def(GEMINILAKE,  0, guc_def(glk, 62, 0, 0), huc_def(glk,  4, 0, 0)) \
-	fw_def(KABYLAKE,    0, guc_def(kbl, 62, 0, 0), huc_def(kbl,  4, 0, 0)) \
-	fw_def(BROXTON,     0, guc_def(bxt, 62, 0, 0), huc_def(bxt,  2, 0, 0)) \
-	fw_def(SKYLAKE,     0, guc_def(skl, 62, 0, 0), huc_def(skl,  2, 0, 0))
+#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 62, 0, 3)) \
+	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  62, 0, 0)) \
+	fw_def(DG1,          0, guc_def(dg1,  62, 0, 0)) \
+	fw_def(ROCKETLAKE,   0, guc_def(tgl,  62, 0, 0)) \
+	fw_def(TIGERLAKE,    0, guc_def(tgl,  62, 0, 0)) \
+	fw_def(JASPERLAKE,   0, guc_def(ehl,  62, 0, 0)) \
+	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  62, 0, 0)) \
+	fw_def(ICELAKE,      0, guc_def(icl,  62, 0, 0)) \
+	fw_def(COMETLAKE,    5, guc_def(cml,  62, 0, 0)) \
+	fw_def(COMETLAKE,    0, guc_def(kbl,  62, 0, 0)) \
+	fw_def(COFFEELAKE,   0, guc_def(kbl,  62, 0, 0)) \
+	fw_def(GEMINILAKE,   0, guc_def(glk,  62, 0, 0)) \
+	fw_def(KABYLAKE,     0, guc_def(kbl,  62, 0, 0)) \
+	fw_def(BROXTON,      0, guc_def(bxt,  62, 0, 0)) \
+	fw_def(SKYLAKE,      0, guc_def(skl,  62, 0, 0))
+
+#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
+	fw_def(ALDERLAKE_P,  0, huc_def(tgl,  7, 9, 3)) \
+	fw_def(ALDERLAKE_S,  0, huc_def(tgl,  7, 9, 3)) \
+	fw_def(DG1,          0, huc_def(dg1,  7, 9, 3)) \
+	fw_def(ROCKETLAKE,   0, huc_def(tgl,  7, 9, 3)) \
+	fw_def(TIGERLAKE,    0, huc_def(tgl,  7, 9, 3)) \
+	fw_def(JASPERLAKE,   0, huc_def(ehl,  9, 0, 0)) \
+	fw_def(ELKHARTLAKE,  0, huc_def(ehl,  9, 0, 0)) \
+	fw_def(ICELAKE,      0, huc_def(icl,  9, 0, 0)) \
+	fw_def(COMETLAKE,    5, huc_def(cml,  4, 0, 0)) \
+	fw_def(COMETLAKE,    0, huc_def(kbl,  4, 0, 0)) \
+	fw_def(COFFEELAKE,   0, huc_def(kbl,  4, 0, 0)) \
+	fw_def(GEMINILAKE,   0, huc_def(glk,  4, 0, 0)) \
+	fw_def(KABYLAKE,     0, huc_def(kbl,  4, 0, 0)) \
+	fw_def(BROXTON,      0, huc_def(bxt,  2, 0, 0)) \
+	fw_def(SKYLAKE,      0, huc_def(skl,  2, 0, 0))

 #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
 	"i915/" \
@ -79,11 +96,11 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 	__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)

 /* All blobs need to be declared via MODULE_FIRMWARE() */
-#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
-	MODULE_FIRMWARE(guc_); \
-	MODULE_FIRMWARE(huc_);
+#define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \
+	MODULE_FIRMWARE(uc_);

-INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)

 /* The below structs and macros are used to iterate across the list of blobs */
 struct __packed uc_fw_blob {
@ -106,31 +123,47 @@ struct __packed uc_fw_blob {
 struct __packed uc_fw_platform_requirement {
 	enum intel_platform p;
 	u8 rev; /* first platform rev using this FW */
-	const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES];
+	const struct uc_fw_blob blob;
 };

-#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \
+#define MAKE_FW_LIST(platform_, revid_, uc_) \
 { \
 	.p = INTEL_##platform_, \
 	.rev = revid_, \
-	.blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \
-	.blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \
+	.blob = uc_, \
 },

+struct fw_blobs_by_type {
+	const struct uc_fw_platform_requirement *blobs;
+	u32 count;
+};
+
 static void
 __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
 {
-	static const struct uc_fw_platform_requirement fw_blobs[] = {
-		INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB)
+	static const struct uc_fw_platform_requirement blobs_guc[] = {
+		INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
 	};
+	static const struct uc_fw_platform_requirement blobs_huc[] = {
+		INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
+	};
+	static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
+		[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+		[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+	};
+	static const struct uc_fw_platform_requirement *fw_blobs;
 	enum intel_platform p = INTEL_INFO(i915)->platform;
+	u32 fw_count;
 	u8 rev = INTEL_REVID(i915);
 	int i;

-	for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) {
+	GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+	fw_blobs = blobs_all[uc_fw->type].blobs;
+	fw_count = blobs_all[uc_fw->type].count;
+
+	for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
 		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
-			const struct uc_fw_blob *blob =
-					&fw_blobs[i].blobs[uc_fw->type];
+			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
 			uc_fw->path = blob->path;
 			uc_fw->major_ver_wanted = blob->major;
 			uc_fw->minor_ver_wanted = blob->minor;
@ -140,7 +173,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)

 	/* make sure the list is ordered as expected */
 	if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
-		for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) {
+		for (i = 1; i < fw_count; i++) {
 			if (fw_blobs[i].p < fw_blobs[i - 1].p)
 				continue;

@ -322,13 +355,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);

 	/* now RSA */
-	if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
-		drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n",
-			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			 css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
-		err = -EPROTO;
-		goto fail;
-	}
 	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);

 	/* At least, it should have header, uCode and RSA. Size of all three. */
@ -540,10 +566,79 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
 	i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 err);
-	intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+	intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
 	return err;
 }

+static inline bool uc_fw_need_rsa_in_memory(struct intel_uc_fw *uc_fw)
+{
+	/*
+	 * The HW reads the GuC RSA from memory if the key size is > 256 bytes,
+	 * while it reads it from the 64 RSA registers if it is smaller.
+	 * The HuC RSA is always read from memory.
+	 */
+	return uc_fw->type == INTEL_UC_FW_TYPE_HUC || uc_fw->rsa_size > 256;
+}
+
+static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
+{
+	struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+	struct i915_vma *vma;
+	size_t copied;
+	void *vaddr;
+	int err;
+
+	err = i915_inject_probe_error(gt->i915, -ENXIO);
+	if (err)
+		return err;
+
+	if (!uc_fw_need_rsa_in_memory(uc_fw))
+		return 0;
+
+	/*
+	 * uC firmwares will sit above GUC_GGTT_TOP and will not map through
+	 * GGTT. Unfortunately, this means that the GuC HW cannot perform the uC
+	 * authentication from memory, as the RSA offset now falls within the
+	 * GuC inaccessible range. We resort to perma-pinning an additional vma
+	 * within the accessible range that only contains the RSA signature.
+	 * The GuC HW can use this extra pinning to perform the authentication
+	 * since its GGTT offset will be GuC accessible.
+	 */
+	GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE);
+	vma = intel_guc_allocate_vma(&gt->uc.guc, PAGE_SIZE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
+						 i915_coherent_map_type(gt->i915, vma->obj, true));
+	if (IS_ERR(vaddr)) {
+		i915_vma_unpin_and_release(&vma, 0);
+		err = PTR_ERR(vaddr);
+		goto unpin_out;
+	}
+
+	copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size);
+	i915_gem_object_unpin_map(vma->obj);
+
+	if (copied < uc_fw->rsa_size) {
+		err = -ENOMEM;
+		goto unpin_out;
+	}
+
+	uc_fw->rsa_data = vma;
+
+	return 0;
+
+unpin_out:
+	i915_vma_unpin_and_release(&vma, 0);
+	return err;
+}
+
+static void uc_fw_rsa_data_destroy(struct intel_uc_fw *uc_fw)
+{
+	i915_vma_unpin_and_release(&uc_fw->rsa_data, 0);
+}
+
 int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
 {
 	int err;
@ -558,14 +653,29 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
 	if (err) {
 		DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
 				 intel_uc_fw_type_repr(uc_fw->type), err);
-		intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+		goto out;
 	}

+	err = uc_fw_rsa_data_create(uc_fw);
+	if (err) {
+		DRM_DEBUG_DRIVER("%s fw rsa data creation failed, err=%d\n",
+				 intel_uc_fw_type_repr(uc_fw->type), err);
+		goto out_unpin;
+	}
+
+	return 0;
+
+out_unpin:
+	i915_gem_object_unpin_pages(uc_fw->obj);
+out:
+	intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
 	return err;
 }

 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
 {
+	uc_fw_rsa_data_destroy(uc_fw);
+
 	if (i915_gem_object_has_pinned_pages(uc_fw->obj))
 		i915_gem_object_unpin_pages(uc_fw->obj);

--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@ -32,11 +32,12 @@ struct intel_gt;
 * |            |    MISSING <--/    |    \--> ERROR                |
 * |   fetch    |                    V                              |
 * |            |                 AVAILABLE                         |
- * +------------+-                   |                             -+
+ * +------------+-                   |   \                         -+
+ * |            |                    |    \--> INIT FAIL            |
 * |   init     |                    V                              |
 * |            |        /------> LOADABLE <----<-----------\       |
 * +------------+-       \         /    \        \           \     -+
- * |            |         FAIL <--<      \--> TRANSFERRED     \     |
+ * |            |    LOAD FAIL <--<      \--> TRANSFERRED     \     |
 * |   upload   |                  \           /   \          /     |
 * |            |                   \---------/     \--> RUNNING    |
 * +------------+---------------------------------------------------+
@ -50,8 +51,9 @@ enum intel_uc_fw_status {
 	INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
 	INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
 	INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
 	INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
-	INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
+	INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
 	INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
 	INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
 };
@ -84,6 +86,7 @@ struct intel_uc_fw {
 	 * or during a GT reset (mutex guarantees single threaded).
 	 */
 	struct i915_vma dummy;
+	struct i915_vma *rsa_data;

 	/*
 	 * The firmware build process will generate a version header file with major and
@ -130,10 +133,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
 		return "ERROR";
 	case INTEL_UC_FIRMWARE_AVAILABLE:
 		return "AVAILABLE";
+	case INTEL_UC_FIRMWARE_INIT_FAIL:
+		return "INIT FAIL";
 	case INTEL_UC_FIRMWARE_LOADABLE:
 		return "LOADABLE";
-	case INTEL_UC_FIRMWARE_FAIL:
-		return "FAIL";
+	case INTEL_UC_FIRMWARE_LOAD_FAIL:
+		return "LOAD FAIL";
 	case INTEL_UC_FIRMWARE_TRANSFERRED:
 		return "TRANSFERRED";
 	case INTEL_UC_FIRMWARE_RUNNING:
@ -155,7 +160,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
 		return -ENOENT;
 	case INTEL_UC_FIRMWARE_ERROR:
 		return -ENOEXEC;
-	case INTEL_UC_FIRMWARE_FAIL:
+	case INTEL_UC_FIRMWARE_INIT_FAIL:
+	case INTEL_UC_FIRMWARE_LOAD_FAIL:
 		return -EIO;
 	case INTEL_UC_FIRMWARE_SELECTED:
 		return -ESTALE;
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@ -3,8 +3,21 @@
 * Copyright <EFBFBD><EFBFBD> 2021 Intel Corporation
 */

+#include "selftests/igt_spinner.h"
 #include "selftests/intel_scheduler_helpers.h"

+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+	int err = 0;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (spin && !igt_wait_for_spinner(spin, rq))
+		err = -ETIMEDOUT;
+
+	return err;
+}
+
 static struct i915_request *nop_user_request(struct intel_context *ce,
 					     struct i915_request *from)
 {
@ -110,12 +123,172 @@ static int intel_guc_scrub_ctbs(void *arg)
 	return ret;
 }

+/*
+ * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
+ *
+ * This test creates a spinner which is used to block all subsequent submissions
+ * until it completes. Next, a loop creates a context and a NOP request each
+ * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
+ * The spinner is ended, unblocking all requests created in the loop. At this
+ * point all guc_ids are exhausted but are available to steal. Try to create
+ * another request which should successfully steal a guc_id. Wait on last
+ * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
+ * exit the test. Test also artificially reduces the number of guc_ids so the
+ * test runs in a timely manner.
+ */
+static int intel_guc_steal_guc_ids(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_guc *guc = &gt->uc.guc;
+	int ret, sv, context_index = 0;
+	intel_wakeref_t wakeref;
+	struct intel_engine_cs *engine;
+	struct intel_context **ce;
+	struct igt_spinner spin;
+	struct i915_request *spin_rq = NULL, *rq, *last = NULL;
+	int number_guc_id_stolen = guc->number_guc_id_stolen;
+
+	ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
+	if (!ce) {
+		pr_err("Context array allocation failed\n");
+		return -ENOMEM;
+	}
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+	engine = intel_selftest_find_any_engine(gt);
+	sv = guc->submission_state.num_guc_ids;
+	guc->submission_state.num_guc_ids = 4096;
+
+	/* Create spinner to block requests in below loop */
+	ce[context_index] = intel_context_create(engine);
+	if (IS_ERR(ce[context_index])) {
+		ret = PTR_ERR(ce[context_index]);
+		ce[context_index] = NULL;
+		pr_err("Failed to create context: %d\n", ret);
+		goto err_wakeref;
+	}
+	ret = igt_spinner_init(&spin, engine->gt);
+	if (ret) {
+		pr_err("Failed to create spinner: %d\n", ret);
+		goto err_contexts;
+	}
+	spin_rq = igt_spinner_create_request(&spin, ce[context_index],
+					     MI_ARB_CHECK);
+	if (IS_ERR(spin_rq)) {
+		ret = PTR_ERR(spin_rq);
+		pr_err("Failed to create spinner request: %d\n", ret);
+		goto err_contexts;
+	}
+	ret = request_add_spin(spin_rq, &spin);
+	if (ret) {
+		pr_err("Failed to add Spinner request: %d\n", ret);
+		goto err_spin_rq;
+	}
+
+	/* Use all guc_ids */
+	while (ret != -EAGAIN) {
+		ce[++context_index] = intel_context_create(engine);
+		if (IS_ERR(ce[context_index])) {
+			ret = PTR_ERR(ce[context_index--]);
+			ce[context_index] = NULL;
+			pr_err("Failed to create context: %d\n", ret);
+			goto err_spin_rq;
+		}
+
+		rq = nop_user_request(ce[context_index], spin_rq);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			rq = NULL;
+			if (ret != -EAGAIN) {
+				pr_err("Failed to create request, %d: %d\n",
+				       context_index, ret);
+				goto err_spin_rq;
+			}
+		} else {
+			if (last)
+				i915_request_put(last);
+			last = rq;
+		}
+	}
+
+	/* Release blocked requests */
+	igt_spinner_end(&spin);
+	ret = intel_selftest_wait_for_rq(spin_rq);
+	if (ret) {
+		pr_err("Spin request failed to complete: %d\n", ret);
+		i915_request_put(last);
+		goto err_spin_rq;
+	}
+	i915_request_put(spin_rq);
+	igt_spinner_fini(&spin);
+	spin_rq = NULL;
+
+	/* Wait for last request */
+	ret = i915_request_wait(last, 0, HZ * 30);
+	i915_request_put(last);
+	if (ret < 0) {
+		pr_err("Last request failed to complete: %d\n", ret);
+		goto err_spin_rq;
+	}
+
+	/* Try to steal guc_id */
+	rq = nop_user_request(ce[context_index], NULL);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret);
+		goto err_spin_rq;
+	}
+
+	/* Wait for request with stolen guc_id */
+	ret = i915_request_wait(rq, 0, HZ);
+	i915_request_put(rq);
+	if (ret < 0) {
+		pr_err("Request with stolen guc_id failed to complete: %d\n",
+		       ret);
+		goto err_spin_rq;
+	}
+
+	/* Wait for idle */
+	ret = intel_gt_wait_for_idle(gt, HZ * 30);
+	if (ret < 0) {
+		pr_err("GT failed to idle: %d\n", ret);
+		goto err_spin_rq;
+	}
+
+	/* Verify a guc_id was stolen */
+	if (guc->number_guc_id_stolen == number_guc_id_stolen) {
+		pr_err("No guc_id was stolen");
+		ret = -EINVAL;
+	} else {
+		ret = 0;
+	}
+
+err_spin_rq:
+	if (spin_rq) {
+		igt_spinner_end(&spin);
+		intel_selftest_wait_for_rq(spin_rq);
+		i915_request_put(spin_rq);
+		igt_spinner_fini(&spin);
+		intel_gt_wait_for_idle(gt, HZ * 30);
+	}
+err_contexts:
+	for (; context_index >= 0 && ce[context_index]; --context_index)
+		intel_context_put(ce[context_index]);
+err_wakeref:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	kfree(ce);
+	guc->submission_state.num_guc_ids = sv;
+
+	return ret;
+}
+
 int intel_guc_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(intel_guc_scrub_ctbs),
+		SUBTEST(intel_guc_steal_guc_ids),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	if (intel_gt_is_wedged(gt))
 		return 0;
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@ -167,7 +167,7 @@ int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(intel_guc_multi_lrc_basic),
 	};
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	if (intel_gt_is_wedged(gt))
 		return 0;
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@ -205,7 +205,7 @@ int intel_gvt_init_device(struct drm_i915_private *i915)
 	spin_lock_init(&gvt->scheduler.mmio_context_lock);
 	mutex_init(&gvt->lock);
 	mutex_init(&gvt->sched_lock);
-	gvt->gt = &i915->gt;
+	gvt->gt = to_gt(i915);
 	i915->gvt = gvt;

 	init_device_info(gvt);
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@ -1386,7 +1386,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	enum intel_engine_id i;
 	int ret;

-	ppgtt = i915_ppgtt_create(&i915->gt, I915_BO_ALLOC_PM_EARLY);
+	ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);

--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@ -426,8 +426,9 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
 	return true;
 }

-int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 {
+	struct dma_fence *fence = &rq->fence;
 	struct i915_active_fence *active;
 	int err;

@ -436,7 +437,7 @@ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
 	if (err)
 		return err;

-	active = active_instance(ref, idx);
+	active = active_instance(ref, i915_request_timeline(rq)->fence_context);
 	if (!active) {
 		err = -ENOMEM;
 		goto out;
@ -477,29 +478,6 @@ __i915_active_set_fence(struct i915_active *ref,
 	return prev;
 }

-static struct i915_active_fence *
-__active_fence(struct i915_active *ref, u64 idx)
-{
-	struct active_node *it;
-
-	it = __active_lookup(ref, idx);
-	if (unlikely(!it)) { /* Contention with parallel tree builders! */
-		spin_lock_irq(&ref->tree_lock);
-		it = __active_lookup(ref, idx);
-		spin_unlock_irq(&ref->tree_lock);
-	}
-	GEM_BUG_ON(!it); /* slot must be preallocated */
-
-	return &it->base;
-}
-
-struct dma_fence *
-__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
-{
-	/* Only valid while active, see i915_active_acquire_for_context() */
-	return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
-}
-
 struct dma_fence *
 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
 {
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@ -164,26 +164,11 @@ void __i915_active_init(struct i915_active *ref,
 	__i915_active_init(ref, active, retire, flags, &__mkey, &__wkey);	\
 } while (0)

-struct dma_fence *
-__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
-int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
-
-static inline int
-i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
-{
-	return i915_active_ref(ref,
-			       i915_request_timeline(rq)->fence_context,
-			       &rq->fence);
-}
+int i915_active_add_request(struct i915_active *ref, struct i915_request *rq);

 struct dma_fence *
 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);

-static inline bool i915_active_has_exclusive(struct i915_active *ref)
-{
-	return rcu_access_pointer(ref->excl.fence);
-}
-
 int __i915_active_wait(struct i915_active *ref, int state);
 static inline int i915_active_wait(struct i915_active *ref)
 {
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@ -65,7 +65,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
 	intel_device_info_print_static(INTEL_INFO(i915), &p);
 	intel_device_info_print_runtime(RUNTIME_INFO(i915), &p);
 	i915_print_iommu_status(i915, &p);
-	intel_gt_info_print(&i915->gt.info, &p);
+	intel_gt_info_print(&to_gt(i915)->info, &p);
 	intel_driver_caps_print(&i915->caps, &p);

 	kernel_param_lock(THIS_MODULE);
@ -293,7 +293,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)

 	gpu = NULL;
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_gpu_coredump(&i915->gt, ALL_ENGINES);
+		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);

@ -351,7 +351,7 @@ static const struct file_operations i915_error_state_fops = {
 static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	struct drm_printer p = drm_seq_file_printer(m);

 	intel_gt_pm_frequency_dump(gt, &p);
@ -439,11 +439,11 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 static int i915_rps_boost_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct intel_rps *rps = &dev_priv->gt.rps;
+	struct intel_rps *rps = &to_gt(dev_priv)->rps;

 	seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
 	seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
-	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
+	seq_printf(m, "GPU busy? %s\n", yesno(to_gt(dev_priv)->awake));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@ -476,7 +476,7 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	seq_printf(m, "Runtime power status: %s\n",
 		   enableddisabled(!dev_priv->power_domains.init_wakeref));

-	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
+	seq_printf(m, "GPU idle: %s\n", yesno(!to_gt(dev_priv)->awake));
 	seq_printf(m, "IRQs disabled: %s\n",
 		   yesno(!intel_irqs_enabled(dev_priv)));
 #ifdef CONFIG_PM
@ -508,18 +508,18 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);

 	seq_printf(m, "GT awake? %s [%d], %llums\n",
-		   yesno(i915->gt.awake),
-		   atomic_read(&i915->gt.wakeref.count),
-		   ktime_to_ms(intel_gt_get_awake_time(&i915->gt)));
+		   yesno(to_gt(i915)->awake),
+		   atomic_read(&to_gt(i915)->wakeref.count),
+		   ktime_to_ms(intel_gt_get_awake_time(to_gt(i915))));
 	seq_printf(m, "CS timestamp frequency: %u Hz, %d ns\n",
-		   i915->gt.clock_frequency,
-		   i915->gt.clock_period_ns);
+		   to_gt(i915)->clock_frequency,
+		   to_gt(i915)->clock_period_ns);

 	p = drm_seq_file_printer(m);
 	for_each_uabi_engine(engine, i915)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);

-	intel_gt_show_timelines(&i915->gt, &p, i915_request_show_with_schedule);
+	intel_gt_show_timelines(to_gt(i915), &p, i915_request_show_with_schedule);

 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);

@ -558,14 +558,14 @@ static int i915_wedged_get(void *data, u64 *val)
 {
 	struct drm_i915_private *i915 = data;

-	return intel_gt_debugfs_reset_show(&i915->gt, val);
+	return intel_gt_debugfs_reset_show(to_gt(i915), val);
 }

 static int i915_wedged_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;

-	return intel_gt_debugfs_reset_store(&i915->gt, val);
+	return intel_gt_debugfs_reset_store(to_gt(i915), val);
 }

 DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
@ -581,7 +581,7 @@ i915_perf_noa_delay_set(void *data, u64 val)
 	 * This would lead to infinite waits as we're doing timestamp
 	 * difference on the CS with only 32bits.
 	 */
-	if (intel_gt_ns_to_clock_interval(&i915->gt, val) > U32_MAX)
+	if (intel_gt_ns_to_clock_interval(to_gt(i915), val) > U32_MAX)
 		return -EINVAL;

 	atomic64_set(&i915->perf.noa_programming_delay, val);
@ -666,16 +666,18 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	unsigned int flags;
 	int ret;

 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);

-	ret = gt_drop_caches(&i915->gt, val);
+	ret = gt_drop_caches(to_gt(i915), val);
 	if (ret)
 		return ret;

 	fs_reclaim_acquire(GFP_KERNEL);
+	flags = memalloc_noreclaim_save();
 	if (val & DROP_BOUND)
 		i915_gem_shrink(NULL, i915, LONG_MAX, NULL, I915_SHRINK_BOUND);

@ -684,6 +686,7 @@ i915_drop_caches_set(void *data, u64 val)

 	if (val & DROP_SHRINK_ALL)
 		i915_gem_shrink_all(i915);
+	memalloc_noreclaim_restore(flags);
 	fs_reclaim_release(GFP_KERNEL);

 	if (val & DROP_RCU)
@ -702,7 +705,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
 static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	return intel_sseu_status(m, gt);
 }
@ -711,14 +714,14 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;

-	return intel_gt_pm_debugfs_forcewake_user_open(&i915->gt);
+	return intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
 }

 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;

-	return intel_gt_pm_debugfs_forcewake_user_release(&i915->gt);
+	return intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
 }

 static const struct file_operations i915_forcewake_fops = {
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@ -40,8 +40,8 @@ static int notify_guc(struct drm_i915_private *i915)
 {
 	int ret = 0;

-	if (intel_uc_uses_guc_submission(&i915->gt.uc))
-		ret = intel_guc_global_policies_update(&i915->gt.uc.guc);
+	if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
+		ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc);

 	return ret;
 }
--- a/drivers/gpu/drm/i915/i915_deps.c
+++ b/drivers/gpu/drm/i915/i915_deps.c
@ -0,0 +1,237 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/slab.h>
+
+#include <drm/ttm/ttm_bo_api.h>
+
+#include "i915_deps.h"
+
+/**
+ * DOC: Set of utilities to dynamically collect dependencies into a
+ * structure which is fed into the GT migration code.
+ *
+ * Once we can do async unbinding, this is also needed to coalesce
+ * the migration fence with the unbind fences if these are coalesced
+ * post-migration.
+ *
+ * While collecting the individual dependencies, we store the refcounted
+ * struct dma_fence pointers in a realloc-managed pointer array, since
+ * that can be easily fed into a dma_fence_array. Other options are
+ * available, like for example an xarray for similarity with drm/sched.
+ * Can be changed easily if needed.
+ *
+ * A struct i915_deps need to be initialized using i915_deps_init().
+ * If i915_deps_add_dependency() or i915_deps_add_resv() return an
+ * error code they will internally call i915_deps_fini(), which frees
+ * all internal references and allocations.
+ */
+
+/* Min number of fence pointers in the array when an allocation occurs. */
+#define I915_DEPS_MIN_ALLOC_CHUNK 8U
+
+static void i915_deps_reset_fences(struct i915_deps *deps)
+{
+	if (deps->fences != &deps->single)
+		kfree(deps->fences);
+	deps->num_deps = 0;
+	deps->fences_size = 1;
+	deps->fences = &deps->single;
+}
+
+/**
+ * i915_deps_init - Initialize an i915_deps structure
+ * @deps: Pointer to the i915_deps structure to initialize.
+ * @gfp: The allocation mode for subsequenst allocations.
+ */
+void i915_deps_init(struct i915_deps *deps, gfp_t gfp)
+{
+	deps->fences = NULL;
+	deps->gfp = gfp;
+	i915_deps_reset_fences(deps);
+}
+
+/**
+ * i915_deps_fini - Finalize an i915_deps structure
+ * @deps: Pointer to the i915_deps structure to finalize.
+ *
+ * This function drops all fence references taken, conditionally frees and
+ * then resets the fences array.
+ */
+void i915_deps_fini(struct i915_deps *deps)
+{
+	unsigned int i;
+
+	for (i = 0; i < deps->num_deps; ++i)
+		dma_fence_put(deps->fences[i]);
+
+	if (deps->fences != &deps->single)
+		kfree(deps->fences);
+}
+
+static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
+			  const struct ttm_operation_ctx *ctx)
+{
+	int ret;
+
+	if (deps->num_deps >= deps->fences_size) {
+		unsigned int new_size = 2 * deps->fences_size;
+		struct dma_fence **new_fences;
+
+		new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK);
+		new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp);
+		if (!new_fences)
+			goto sync;
+
+		memcpy(new_fences, deps->fences,
+		       deps->fences_size * sizeof(*new_fences));
+		swap(new_fences, deps->fences);
+		if (new_fences != &deps->single)
+			kfree(new_fences);
+		deps->fences_size = new_size;
+	}
+	deps->fences[deps->num_deps++] = dma_fence_get(fence);
+	return 0;
+
+sync:
+	if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) {
+		ret = -EBUSY;
+		goto unref;
+	}
+
+	ret = dma_fence_wait(fence, ctx->interruptible);
+	if (ret)
+		goto unref;
+
+	ret = fence->error;
+	if (ret)
+		goto unref;
+
+	return 0;
+
+unref:
+	i915_deps_fini(deps);
+	return ret;
+}
+
+/**
+ * i915_deps_sync - Wait for all the fences in the dependency collection
+ * @deps: Pointer to the i915_deps structure the fences of which to wait for.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how the waits
+ * should be performed.
+ *
+ * This function waits for fences in the dependency collection. If it
+ * encounters an error during the wait or a fence error, the wait for
+ * further fences is aborted and the error returned.
+ *
+ * Return: Zero if successful, Negative error code on error.
+ */
+int i915_deps_sync(const struct i915_deps *deps, const struct ttm_operation_ctx *ctx)
+{
+	struct dma_fence **fences = deps->fences;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < deps->num_deps; ++i, ++fences) {
+		if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) {
+			ret = -EBUSY;
+			break;
+		}
+
+		ret = dma_fence_wait(*fences, ctx->interruptible);
+		if (!ret)
+			ret = (*fences)->error;
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/**
+ * i915_deps_add_dependency - Add a fence to the dependency collection
+ * @deps: Pointer to the i915_deps structure a fence is to be added to.
+ * @fence: The fence to add.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how waits are to
+ * be performed if waiting.
+ *
+ * Adds a fence to the dependency collection, and takes a reference on it.
+ * If the fence context is not zero and there was a later fence from the
+ * same fence context already added, then the fence is not added to the
+ * dependency collection. If the fence context is not zero and there was
+ * an earlier fence already added, then the fence will replace the older
+ * fence from the same context and the reference on the earlier fence will
+ * be dropped.
+ * If there is a failure to allocate memory to accommodate the new fence to
+ * be added, the new fence will instead be waited for and an error may
+ * be returned; depending on the value of @ctx, or if there was a fence
+ * error. If an error was returned, the dependency collection will be
+ * finalized and all fence reference dropped.
+ *
+ * Return: 0 if success. Negative error code on error.
+ */
+int i915_deps_add_dependency(struct i915_deps *deps,
+			     struct dma_fence *fence,
+			     const struct ttm_operation_ctx *ctx)
+{
+	unsigned int i;
+	int ret;
+
+	if (!fence)
+		return 0;
+
+	if (dma_fence_is_signaled(fence)) {
+		ret = fence->error;
+		if (ret)
+			i915_deps_fini(deps);
+		return ret;
+	}
+
+	for (i = 0; i < deps->num_deps; ++i) {
+		struct dma_fence *entry = deps->fences[i];
+
+		if (!entry->context || entry->context != fence->context)
+			continue;
+
+		if (dma_fence_is_later(fence, entry)) {
+			dma_fence_put(entry);
+			deps->fences[i] = dma_fence_get(fence);
+		}
+
+		return 0;
+	}
+
+	return i915_deps_grow(deps, fence, ctx);
+}
+
+/**
+ * i915_deps_add_resv - Add the fences of a reservation object to a dependency
+ * collection.
+ * @deps: Pointer to the i915_deps structure a fence is to be added to.
+ * @resv: The reservation object, then fences of which to add.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how waits are to
+ * be performed if waiting.
+ *
+ * Calls i915_deps_add_depencency() on the indicated fences of @resv.
+ *
+ * Return: Zero on success. Negative error code on error.
+ */
+int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
+		       const struct ttm_operation_ctx *ctx)
+{
+	struct dma_resv_iter iter;
+	struct dma_fence *fence;
+
+	dma_resv_assert_held(resv);
+	dma_resv_for_each_fence(&iter, resv, true, fence) {
+		int ret = i915_deps_add_dependency(deps, fence, ctx);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
--- a/drivers/gpu/drm/i915/i915_deps.h
+++ b/drivers/gpu/drm/i915/i915_deps.h
@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _I915_DEPS_H_
+#define _I915_DEPS_H_
+
+#include <linux/types.h>
+
+struct ttm_operation_ctx;
+struct dma_fence;
+struct dma_resv;
+
+/**
+ * struct i915_deps - Collect dependencies into a single dma-fence
+ * @single: Storage for pointer if the collection is a single fence.
+ * @fences: Allocated array of fence pointers if more than a single fence;
+ * otherwise points to the address of @single.
+ * @num_deps: Current number of dependency fences.
+ * @fences_size: Size of the @fences array in number of pointers.
+ * @gfp: Allocation mode.
+ */
+struct i915_deps {
+	struct dma_fence *single;
+	struct dma_fence **fences;
+	unsigned int num_deps;
+	unsigned int fences_size;
+	gfp_t gfp;
+};
+
+void i915_deps_init(struct i915_deps *deps, gfp_t gfp);
+
+void i915_deps_fini(struct i915_deps *deps);
+
+int i915_deps_add_dependency(struct i915_deps *deps,
+			     struct dma_fence *fence,
+			     const struct ttm_operation_ctx *ctx);
+
+int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
+		       const struct ttm_operation_ctx *ctx);
+
+int i915_deps_sync(const struct i915_deps *deps,
+		   const struct ttm_operation_ctx *ctx);
+#endif
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@ -291,7 +291,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv)
 static void sanitize_gpu(struct drm_i915_private *i915)
 {
 	if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
-		__intel_gt_reset(&i915->gt, ALL_ENGINES);
+		__intel_gt_reset(to_gt(i915), ALL_ENGINES);
 }

 /**
@ -314,8 +314,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 	intel_device_info_subplatform_init(dev_priv);
 	intel_step_init(dev_priv);

+	intel_gt_init_early(to_gt(dev_priv), dev_priv);
 	intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug);
-	intel_uncore_init_early(&dev_priv->uncore, dev_priv);
+	intel_uncore_init_early(&dev_priv->uncore, to_gt(dev_priv));

 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->gpu_error.lock);
@ -346,7 +347,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)

 	intel_wopcm_init_early(&dev_priv->wopcm);

-	intel_gt_init_early(&dev_priv->gt, dev_priv);
+	__intel_gt_init_early(to_gt(dev_priv), dev_priv);

 	i915_gem_init_early(dev_priv);

@ -367,7 +368,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)

 err_gem:
 	i915_gem_cleanup_early(dev_priv);
-	intel_gt_driver_late_release(&dev_priv->gt);
+	intel_gt_driver_late_release(to_gt(dev_priv));
 	intel_region_ttm_device_fini(dev_priv);
 err_ttm:
 	vlv_suspend_cleanup(dev_priv);
@ -386,7 +387,7 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
 	intel_irq_fini(dev_priv);
 	intel_power_domains_cleanup(dev_priv);
 	i915_gem_cleanup_early(dev_priv);
-	intel_gt_driver_late_release(&dev_priv->gt);
+	intel_gt_driver_late_release(to_gt(dev_priv));
 	intel_region_ttm_device_fini(dev_priv);
 	vlv_suspend_cleanup(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
@ -429,7 +430,7 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
 	intel_setup_mchbar(dev_priv);
 	intel_device_info_runtime_init(dev_priv);

-	ret = intel_gt_init_mmio(&dev_priv->gt);
+	ret = intel_gt_init_mmio(to_gt(dev_priv));
 	if (ret)
 		goto err_uncore;

@ -586,9 +587,9 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_ggtt;

-	intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt);
+	intel_gt_init_hw_early(to_gt(dev_priv), &dev_priv->ggtt);

-	ret = intel_gt_probe_lmem(&dev_priv->gt);
+	ret = intel_gt_probe_lmem(to_gt(dev_priv));
 	if (ret)
 		goto err_mem_regions;

@ -701,7 +702,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 	/* Depends on sysfs having been initialized */
 	i915_perf_register(dev_priv);

-	intel_gt_driver_register(&dev_priv->gt);
+	intel_gt_driver_register(to_gt(dev_priv));

 	intel_display_driver_register(dev_priv);

@ -729,7 +730,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)

 	intel_display_driver_unregister(dev_priv);

-	intel_gt_driver_unregister(&dev_priv->gt);
+	intel_gt_driver_unregister(to_gt(dev_priv));

 	i915_perf_unregister(dev_priv);
 	i915_pmu_unregister(dev_priv);
@ -762,7 +763,7 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv)
 		intel_device_info_print_static(INTEL_INFO(dev_priv), &p);
 		intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p);
 		i915_print_iommu_status(dev_priv, &p);
-		intel_gt_info_print(&dev_priv->gt.info, &p);
+		intel_gt_info_print(&to_gt(dev_priv)->info, &p);
 	}

 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
@ -1384,7 +1385,7 @@ static int i915_drm_resume_early(struct drm_device *dev)

 	intel_uncore_resume_early(&dev_priv->uncore);

-	intel_gt_check_and_clear_faults(&dev_priv->gt);
+	intel_gt_check_and_clear_faults(to_gt(dev_priv));

 	intel_display_power_resume_early(dev_priv);

@ -1567,7 +1568,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	 */
 	i915_gem_runtime_suspend(dev_priv);

-	intel_gt_runtime_suspend(&dev_priv->gt);
+	intel_gt_runtime_suspend(to_gt(dev_priv));

 	intel_runtime_pm_disable_interrupts(dev_priv);

@ -1583,7 +1584,7 @@ static int intel_runtime_suspend(struct device *kdev)

 		intel_runtime_pm_enable_interrupts(dev_priv);

-		intel_gt_runtime_resume(&dev_priv->gt);
+		intel_gt_runtime_resume(to_gt(dev_priv));

 		enable_rpm_wakeref_asserts(rpm);

@ -1671,7 +1672,7 @@ static int intel_runtime_resume(struct device *kdev)
 	 * No point of rolling back things in case of an error, as the best
 	 * we can do is to hope that things will still work (and disable RPM).
 	 */
-	intel_gt_runtime_resume(&dev_priv->gt);
+	intel_gt_runtime_resume(to_gt(dev_priv));

 	/*
 	 * On VLV/CHV display interrupts are part of the display
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@ -473,7 +473,7 @@ struct i915_gem_mm {
 	 * List of objects which are pending destruction.
 	 */
 	struct llist_head free_list;
-	struct work_struct free_work;
+	struct delayed_work free_work;
 	/**
 	 * Count of objects pending destructions. Used to skip needlessly
 	 * waiting on an RCU barrier if no objects are waiting to be freed.
@ -1005,7 +1005,7 @@ struct drm_i915_private {
 	struct i915_perf perf;

 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
-	struct intel_gt gt;
+	struct intel_gt gt0;

 	struct {
 		struct i915_gem_contexts {
@ -1077,6 +1077,11 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
 	return pci_get_drvdata(pdev);
 }

+static inline struct intel_gt *to_gt(struct drm_i915_private *i915)
+{
+	return &i915->gt0;
+}
+
 /* Simple iterator over all initialised engines */
 #define for_each_engine(engine__, dev_priv__, id__) \
 	for ((id__) = 0; \
@ -1527,6 +1532,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_MSLICES(dev_priv) \
 	(INTEL_INFO(dev_priv)->has_mslices)

+/*
+ * Set this flag, when platform requires 64K GTT page sizes or larger for
+ * device local memory access. Also this flag implies that we require or
+ * at least support the compact PT layout for the ppGTT when using the 64K
+ * GTT pages.
+ */
+#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
+
 #define HAS_IPC(dev_priv)		 (INTEL_INFO(dev_priv)->display.has_ipc)

 #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
@ -1540,7 +1553,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,

 #define HAS_PXP(dev_priv)  ((IS_ENABLED(CONFIG_DRM_I915_PXP) && \
 			    INTEL_INFO(dev_priv)->has_pxp) && \
-			    VDBOX_MASK(&dev_priv->gt))
+			    VDBOX_MASK(to_gt(dev_priv)))

 #define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch)

@ -1623,7 +1636,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 	 * armed the work again.
 	 */
 	while (atomic_read(&i915->mm.free_count)) {
-		flush_work(&i915->mm.free_work);
+		flush_delayed_work(&i915->mm.free_work);
 		flush_delayed_work(&i915->bdev.wq);
 		rcu_barrier();
 	}
@ -1657,13 +1670,10 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 			    const struct i915_ggtt_view *view,
 			    u64 size, u64 alignment, u64 flags);

-static inline struct i915_vma * __must_check
+struct i915_vma * __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
-			 u64 size, u64 alignment, u64 flags)
-{
-	return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags);
-}
+			 u64 size, u64 alignment, u64 flags);

 int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 			   unsigned long flags);
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@ -877,6 +877,8 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;

+	GEM_WARN_ON(!ww);
+
 	if (flags & PIN_MAPPABLE &&
 	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
 		/*
@ -936,10 +938,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 			return ERR_PTR(ret);
 	}

-	if (ww)
-		ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
-	else
-		ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+	ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);

 	if (ret)
 		return ERR_PTR(ret);
@ -959,6 +958,29 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 	return vma;
 }

+struct i915_vma * __must_check
+i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
+			 const struct i915_ggtt_view *view,
+			 u64 size, u64 alignment, u64 flags)
+{
+	struct i915_gem_ww_ctx ww;
+	struct i915_vma *ret;
+	int err;
+
+	for_i915_gem_ww(&ww, err, true) {
+		err = i915_gem_object_lock(obj, &ww);
+		if (err)
+			continue;
+
+		ret = i915_gem_object_ggtt_pin_ww(obj, &ww, view, size,
+						  alignment, flags);
+		if (IS_ERR(ret))
+			err = PTR_ERR(ret);
+	}
+
+	return err ? ERR_PTR(err) : ret;
+}
+
 int
 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
@ -1049,7 +1071,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;

-	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
+	intel_uc_fetch_firmwares(&to_gt(dev_priv)->uc);
 	intel_wopcm_init(&dev_priv->wopcm);

 	ret = i915_init_ggtt(dev_priv);
@ -1069,7 +1091,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 */
 	intel_init_clock_gating(dev_priv);

-	ret = intel_gt_init(&dev_priv->gt);
+	ret = intel_gt_init(to_gt(dev_priv));
 	if (ret)
 		goto err_unlock;

@ -1085,7 +1107,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	i915_gem_drain_workqueue(dev_priv);

 	if (ret != -EIO)
-		intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
+		intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);

 	if (ret == -EIO) {
 		/*
@ -1093,10 +1115,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		 * as wedged. But we only want to do this when the GPU is angry,
 		 * for all other failure, such as an allocation failure, bail.
 		 */
-		if (!intel_gt_is_wedged(&dev_priv->gt)) {
+		if (!intel_gt_is_wedged(to_gt(dev_priv))) {
 			i915_probe_error(dev_priv,
 					 "Failed to initialize GPU, declaring it wedged!\n");
-			intel_gt_set_wedged(&dev_priv->gt);
+			intel_gt_set_wedged(to_gt(dev_priv));
 		}

 		/* Minimal basic recovery for KMS */
@ -1127,7 +1149,7 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);

 	i915_gem_suspend_late(dev_priv);
-	intel_gt_driver_remove(&dev_priv->gt);
+	intel_gt_driver_remove(to_gt(dev_priv));
 	dev_priv->uabi_engines = RB_ROOT;

 	/* Flush any outstanding unpin_work. */
@ -1138,9 +1160,9 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)

 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
-	intel_gt_driver_release(&dev_priv->gt);
+	intel_gt_driver_release(to_gt(dev_priv));

-	intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
+	intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);

 	i915_gem_drain_freed_objects(dev_priv);

--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@ -13,7 +13,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
+	const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
 	drm_i915_getparam_t *param = data;
 	int value = 0;

@ -82,8 +82,8 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	case I915_PARAM_HAS_GPU_RESET:
 		value = i915->params.enable_hangcheck &&
-			intel_has_gpu_reset(&i915->gt);
-		if (value && intel_has_reset_engine(&i915->gt))
+			intel_has_gpu_reset(to_gt(i915));
+		if (value && intel_has_reset_engine(to_gt(i915)))
 			value = 2;
 		break;
 	case I915_PARAM_HAS_RESOURCE_STREAMER:
@ -96,7 +96,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = sseu->min_eu_in_pool;
 		break;
 	case I915_PARAM_HUC_STATUS:
-		value = intel_huc_check_status(&i915->gt.uc.huc);
+		value = intel_huc_check_status(&to_gt(i915)->uc.huc);
 		if (value < 0)
 			return value;
 		break;
@ -158,7 +158,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
-		value = i915->gt.clock_frequency;
+		value = to_gt(i915)->clock_frequency;
 		break;
 	case I915_PARAM_MMAP_GTT_COHERENT:
 		value = INTEL_INFO(i915)->has_coherent_ggtt;
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@ -505,7 +505,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct i915_gem_context_coredump *ctx)
 {
-	const u32 period = m->i915->gt.clock_period_ns;
+	const u32 period = to_gt(m->i915)->clock_period_ns;

 	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
 		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
@ -1415,18 +1415,15 @@ static struct i915_vma_coredump *
 create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
 		    const char *name, struct i915_vma_compress *compress)
 {
-	struct i915_vma_coredump *ret = NULL;
+	struct i915_vma_coredump *ret;
 	struct i915_vma_snapshot tmp;
-	bool lockdep_cookie;

 	if (!vma)
 		return NULL;

+	GEM_WARN_ON(!i915_vma_is_pinned(vma));
 	i915_vma_snapshot_init_onstack(&tmp, vma, name);
-	if (i915_vma_snapshot_resource_pin(&tmp, &lockdep_cookie)) {
-		ret = i915_vma_coredump_create(gt, &tmp, compress);
-		i915_vma_snapshot_resource_unpin(&tmp, lockdep_cookie);
-	}
+	ret = i915_vma_coredump_create(gt, &tmp, compress);
 	i915_vma_snapshot_put_onstack(&tmp);

 	return ret;
@ -1849,7 +1846,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)

 	error->time = ktime_get_real();
 	error->boottime = ktime_get_boottime();
-	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+	error->uptime = ktime_sub(ktime_get(), to_gt(i915)->last_init_time);
 	error->capture = jiffies;

 	capture_gen(error);
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@ -1040,7 +1040,7 @@ static void ivb_parity_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), l3_parity.error_work);
-	struct intel_gt *gt = &dev_priv->gt;
+	struct intel_gt *gt = to_gt(dev_priv);
 	u32 error_status, row, bank, subbank;
 	char *parity_event[6];
 	u32 misccpctl;
@ -1718,9 +1718,9 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE);

 		if (gt_iir)
-			gen6_gt_irq_handler(&dev_priv->gt, gt_iir);
+			gen6_gt_irq_handler(to_gt(dev_priv), gt_iir);
 		if (pm_iir)
-			gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir);
+			gen6_rps_irq_handler(&to_gt(dev_priv)->rps, pm_iir);

 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@ -1777,7 +1777,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		ier = intel_uncore_read(&dev_priv->uncore, VLV_IER);
 		intel_uncore_write(&dev_priv->uncore, VLV_IER, 0);

-		gen8_gt_irq_handler(&dev_priv->gt, master_ctl);
+		gen8_gt_irq_handler(to_gt(dev_priv), master_ctl);

 		if (iir & I915_DISPLAY_PORT_INTERRUPT)
 			hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@ -2108,7 +2108,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
 	}

 	if (DISPLAY_VER(dev_priv) == 5 && de_iir & DE_PCU_EVENT)
-		gen5_rps_irq_handler(&dev_priv->gt.rps);
+		gen5_rps_irq_handler(&to_gt(dev_priv)->rps);
 }

 static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
@ -2189,9 +2189,9 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg)
 	if (gt_iir) {
 		raw_reg_write(regs, GTIIR, gt_iir);
 		if (GRAPHICS_VER(i915) >= 6)
-			gen6_gt_irq_handler(&i915->gt, gt_iir);
+			gen6_gt_irq_handler(to_gt(i915), gt_iir);
 		else
-			gen5_gt_irq_handler(&i915->gt, gt_iir);
+			gen5_gt_irq_handler(to_gt(i915), gt_iir);
 		ret = IRQ_HANDLED;
 	}

@ -2209,7 +2209,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg)
 		u32 pm_iir = raw_reg_read(regs, GEN6_PMIIR);
 		if (pm_iir) {
 			raw_reg_write(regs, GEN6_PMIIR, pm_iir);
-			gen6_rps_irq_handler(&i915->gt.rps, pm_iir);
+			gen6_rps_irq_handler(&to_gt(i915)->rps, pm_iir);
 			ret = IRQ_HANDLED;
 		}
 	}
@ -2635,7 +2635,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	}

 	/* Find, queue (onto bottom-halves), then clear each source */
-	gen8_gt_irq_handler(&dev_priv->gt, master_ctl);
+	gen8_gt_irq_handler(to_gt(dev_priv), master_ctl);

 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
@ -2715,7 +2715,7 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	void __iomem * const regs = i915->uncore.regs;
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	u32 master_ctl;
 	u32 gu_misc_iir;

@ -2771,7 +2771,7 @@ static inline void dg1_master_intr_enable(void __iomem * const regs)
 static irqreturn_t dg1_irq_handler(int irq, void *arg)
 {
 	struct drm_i915_private * const i915 = arg;
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	void __iomem * const regs = gt->uncore->regs;
 	u32 master_tile_ctl, master_ctl;
 	u32 gu_misc_iir;
@ -3075,7 +3075,7 @@ static void ilk_irq_reset(struct drm_i915_private *dev_priv)
 		intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff);
 	}

-	gen5_gt_irq_reset(&dev_priv->gt);
+	gen5_gt_irq_reset(to_gt(dev_priv));

 	ibx_irq_reset(dev_priv);
 }
@ -3085,7 +3085,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv)
 	intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, 0);
 	intel_uncore_posting_read(&dev_priv->uncore, VLV_MASTER_IER);

-	gen5_gt_irq_reset(&dev_priv->gt);
+	gen5_gt_irq_reset(to_gt(dev_priv));

 	spin_lock_irq(&dev_priv->irq_lock);
 	if (dev_priv->display_irqs_enabled)
@ -3119,7 +3119,7 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv)

 	gen8_master_intr_disable(dev_priv->uncore.regs);

-	gen8_gt_irq_reset(&dev_priv->gt);
+	gen8_gt_irq_reset(to_gt(dev_priv));
 	gen8_display_irq_reset(dev_priv);
 	GEN3_IRQ_RESET(uncore, GEN8_PCU_);

@ -3173,7 +3173,7 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)

 static void gen11_irq_reset(struct drm_i915_private *dev_priv)
 {
-	struct intel_gt *gt = &dev_priv->gt;
+	struct intel_gt *gt = to_gt(dev_priv);
 	struct intel_uncore *uncore = gt->uncore;

 	gen11_master_intr_disable(dev_priv->uncore.regs);
@ -3187,7 +3187,7 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv)

 static void dg1_irq_reset(struct drm_i915_private *dev_priv)
 {
-	struct intel_gt *gt = &dev_priv->gt;
+	struct intel_gt *gt = to_gt(dev_priv);
 	struct intel_uncore *uncore = gt->uncore;

 	dg1_master_intr_disable(dev_priv->uncore.regs);
@ -3252,7 +3252,7 @@ static void cherryview_irq_reset(struct drm_i915_private *dev_priv)
 	intel_uncore_write(&dev_priv->uncore, GEN8_MASTER_IRQ, 0);
 	intel_uncore_posting_read(&dev_priv->uncore, GEN8_MASTER_IRQ);

-	gen8_gt_irq_reset(&dev_priv->gt);
+	gen8_gt_irq_reset(to_gt(dev_priv));

 	GEN3_IRQ_RESET(uncore, GEN8_PCU_);

@ -3709,7 +3709,7 @@ static void ilk_irq_postinstall(struct drm_i915_private *dev_priv)

 	ibx_irq_postinstall(dev_priv);

-	gen5_gt_irq_postinstall(&dev_priv->gt);
+	gen5_gt_irq_postinstall(to_gt(dev_priv));

 	GEN3_IRQ_INIT(uncore, DE, dev_priv->irq_mask,
 		      display_mask | extra_mask);
@ -3746,7 +3746,7 @@ void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv)

 static void valleyview_irq_postinstall(struct drm_i915_private *dev_priv)
 {
-	gen5_gt_irq_postinstall(&dev_priv->gt);
+	gen5_gt_irq_postinstall(to_gt(dev_priv));

 	spin_lock_irq(&dev_priv->irq_lock);
 	if (dev_priv->display_irqs_enabled)
@ -3852,7 +3852,7 @@ static void gen8_irq_postinstall(struct drm_i915_private *dev_priv)
 	else if (HAS_PCH_SPLIT(dev_priv))
 		ibx_irq_postinstall(dev_priv);

-	gen8_gt_irq_postinstall(&dev_priv->gt);
+	gen8_gt_irq_postinstall(to_gt(dev_priv));
 	gen8_de_irq_postinstall(dev_priv);

 	gen8_master_intr_enable(dev_priv->uncore.regs);
@ -3871,7 +3871,7 @@ static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv)

 static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
 {
-	struct intel_gt *gt = &dev_priv->gt;
+	struct intel_gt *gt = to_gt(dev_priv);
 	struct intel_uncore *uncore = gt->uncore;
 	u32 gu_misc_masked = GEN11_GU_MISC_GSE;

@ -3889,7 +3889,7 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)

 static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
 {
-	struct intel_gt *gt = &dev_priv->gt;
+	struct intel_gt *gt = to_gt(dev_priv);
 	struct intel_uncore *uncore = gt->uncore;
 	u32 gu_misc_masked = GEN11_GU_MISC_GSE;

@ -3910,7 +3910,7 @@ static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)

 static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv)
 {
-	gen8_gt_irq_postinstall(&dev_priv->gt);
+	gen8_gt_irq_postinstall(to_gt(dev_priv));

 	spin_lock_irq(&dev_priv->irq_lock);
 	if (dev_priv->display_irqs_enabled)
@ -4073,7 +4073,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);

 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
+			intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir);

 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@ -4181,7 +4181,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);

 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
+			intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir);

 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@ -4326,11 +4326,11 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);

 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_cs_irq(dev_priv->gt.engine[RCS0],
+			intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0],
 					    iir);

 		if (iir & I915_BSD_USER_INTERRUPT)
-			intel_engine_cs_irq(dev_priv->gt.engine[VCS0],
+			intel_engine_cs_irq(to_gt(dev_priv)->engine[VCS0],
 					    iir >> 25);

 		if (iir & I915_MASTER_ERROR_INTERRUPT)
@ -4381,7 +4381,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)

 	/* pre-gen11 the guc irqs bits are in the upper 16 bits of the pm reg */
 	if (HAS_GT_UC(dev_priv) && GRAPHICS_VER(dev_priv) < 11)
-		dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16;
+		to_gt(dev_priv)->pm_guc_events = GUC_INTR_GUC2HOST << 16;

 	if (!HAS_DISPLAY(dev_priv))
 		return;
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@ -140,6 +140,9 @@ i915_param_named_unsafe(invert_brightness, int, 0400,
 i915_param_named(disable_display, bool, 0400,
 	"Disable display (default: false)");

+i915_param_named(memtest, bool, 0400,
+	"Perform a read/write test of all device memory on module load (default: off)");
+
 i915_param_named(mmio_debug, int, 0400,
 	"Enable the MMIO debug code for the first N failures (default: off). "
 	"This may negatively affect performance.");
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@ -64,6 +64,7 @@ struct drm_printer;
 	param(char *, guc_firmware_path, NULL, 0400) \
 	param(char *, huc_firmware_path, NULL, 0400) \
 	param(char *, dmc_firmware_path, NULL, 0400) \
+	param(bool, memtest, false, 0400) \
 	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \
 	param(int, edp_vswing, 0, 0400) \
 	param(unsigned int, reset, 3, 0600) \
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@ -1027,6 +1027,7 @@ static const struct intel_device_info xehpsdv_info = {
 	DGFX_FEATURES,
 	PLATFORM(INTEL_XEHPSDV),
 	.display = { },
+	.has_64k_pages = 1,
 	.platform_engine_mask =
 		BIT(RCS0) | BIT(BCS0) |
 		BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
@ -1044,6 +1045,7 @@ static const struct intel_device_info dg2_info = {
 	.graphics.rel = 55,
 	.media.rel = 55,
 	PLATFORM(INTEL_DG2),
+	.has_64k_pages = 1,
 	.platform_engine_mask =
 		BIT(RCS0) | BIT(BCS0) |
 		BIT(VECS0) | BIT(VECS1) |
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@ -4443,7 +4443,7 @@ void i915_perf_init(struct drm_i915_private *i915)
 		mutex_init(&perf->lock);

 		/* Choose a representative limit */
-		oa_sample_rate_hard_limit = i915->gt.clock_frequency / 2;
+		oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;

 		mutex_init(&perf->metrics_lock);
 		idr_init_base(&perf->metrics_idr, 1);
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@ -210,8 +210,8 @@ static void init_rc6(struct i915_pmu *pmu)
 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
 	intel_wakeref_t wakeref;

-	with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) {
-		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+	with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
+		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
 					pmu->sample[__I915_SAMPLE_RC6].cur;
 		pmu->sleep_last = ktime_get_raw();
@ -222,7 +222,7 @@ static void park_rc6(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;

-	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
 	pmu->sleep_last = ktime_get_raw();
 }

@ -419,7 +419,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 	struct drm_i915_private *i915 =
 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
 	struct i915_pmu *pmu = &i915->pmu;
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);
 	unsigned int period_ns;
 	ktime_t now;

@ -476,7 +476,7 @@ engine_event_status(struct intel_engine_cs *engine,
 static int
 config_status(struct drm_i915_private *i915, u64 config)
 {
-	struct intel_gt *gt = &i915->gt;
+	struct intel_gt *gt = to_gt(i915);

 	switch (config) {
 	case I915_PMU_ACTUAL_FREQUENCY:
@ -601,10 +601,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 			val = READ_ONCE(pmu->irq_count);
 			break;
 		case I915_PMU_RC6_RESIDENCY:
-			val = get_rc6(&i915->gt);
+			val = get_rc6(to_gt(i915));
 			break;
 		case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
-			val = ktime_to_ns(intel_gt_get_awake_time(&i915->gt));
+			val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
 			break;
 		}
 	}
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@ -31,7 +31,7 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
 static int query_topology_info(struct drm_i915_private *dev_priv,
 			       struct drm_i915_query_item *query_item)
 {
-	const struct sseu_dev_info *sseu = &dev_priv->gt.info.sseu;
+	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
 	struct drm_i915_query_topology_info topo;
 	u32 slice_length, subslice_length, eu_length, total_length;
 	int ret;
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@ -9417,6 +9417,7 @@ enum {
 #define   GEN6_OFFSET(x)			((x) << 19)
 #define   GEN6_AGGRESSIVE_TURBO			(0 << 15)
 #define   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT	23
+#define   GEN9_IGNORE_SLICE_RATIO		(0 << 0)

 #define GEN6_RC_VIDEO_FREQ			_MMIO(0xA00C)
 #define GEN6_RC_CONTROL				_MMIO(0xA090)
@ -9452,6 +9453,9 @@ enum {
 #define   GEN6_RP_UP_BUSY_CONT			(0x4 << 3)
 #define   GEN6_RP_DOWN_IDLE_AVG			(0x2 << 0)
 #define   GEN6_RP_DOWN_IDLE_CONT		(0x1 << 0)
+#define   GEN6_RPSWCTL_SHIFT			9
+#define   GEN9_RPSWCTL_ENABLE			(0x2 << GEN6_RPSWCTL_SHIFT)
+#define   GEN9_RPSWCTL_DISABLE			(0x0 << GEN6_RPSWCTL_SHIFT)
 #define GEN6_RP_UP_THRESHOLD			_MMIO(0xA02C)
 #define GEN6_RP_DOWN_THRESHOLD			_MMIO(0xA030)
 #define GEN6_RP_CUR_UP_EI			_MMIO(0xA050)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@ -42,6 +42,7 @@
 #include "gt/intel_rps.h"

 #include "i915_active.h"
+#include "i915_deps.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_pm.h"
@ -308,6 +309,7 @@ void i915_request_free_capture_list(struct i915_capture_list *capture)
 		struct i915_capture_list *next = capture->next;

 		i915_vma_snapshot_put(capture->vma_snapshot);
+		kfree(capture);
 		capture = next;
 	}
 }
@ -1542,6 +1544,27 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }

+/**
+ * i915_request_await_deps - set this request to (async) wait upon a struct
+ * i915_deps dma_fence collection
+ * @rq: request we are wishing to use
+ * @deps: The struct i915_deps containing the dependencies.
+ *
+ * Returns 0 if successful, negative error code on error.
+ */
+int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps)
+{
+	int i, err;
+
+	for (i = 0; i < deps->num_deps; ++i) {
+		err = i915_request_await_dma_fence(rq, deps->fences[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 /**
 * i915_request_await_object - set this request to (async) wait upon a bo
 * @to: request we are wishing to use
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@ -47,6 +47,7 @@
 struct drm_file;
 struct drm_i915_gem_object;
 struct drm_printer;
+struct i915_deps;
 struct i915_request;

 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
@ -411,6 +412,7 @@ int i915_request_await_object(struct i915_request *to,
 			      bool write);
 int i915_request_await_dma_fence(struct i915_request *rq,
 				 struct dma_fence *fence);
+int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps);
 int i915_request_await_execution(struct i915_request *rq,
 				 struct dma_fence *fence);

@ -657,7 +659,8 @@ i915_request_timeline(const struct i915_request *rq)
 {
 	/* Valid only while the request is being constructed (or retired). */
 	return rcu_dereference_protected(rq->timeline,
-					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+					 test_bit(CONTEXT_IS_PARKING, &rq->context->flags));
 }

 static inline struct i915_gem_context *
--- a/Show more
+++ b/Show more