From 06b2b1a40e3a9a1ea80b4c51885d863ca194ef3f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 May 2019 13:01:00 +0100
Subject: [PATCH 01/11] drm/i915: Rearrange i915_scheduler.c

To avoid pulling in a forward declaration in the next patch, move the
i915_sched_node handling to after the main dfs of the scheduler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190513120102.29660-1-chris@chris-wilson.co.uk
(cherry picked from commit 5ae87063c162679a61f2141041d0918cc3045daf)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 210 +++++++++++++-------------
 1 file changed, 105 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..5756fcfe343d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -35,109 +35,6 @@ static inline bool node_signaled(const struct i915_sched_node *node)
 	return i915_request_completed(node_to_request(node));
 }
 
-void i915_sched_node_init(struct i915_sched_node *node)
-{
-	INIT_LIST_HEAD(&node->signalers_list);
-	INIT_LIST_HEAD(&node->waiters_list);
-	INIT_LIST_HEAD(&node->link);
-	node->attr.priority = I915_PRIORITY_INVALID;
-	node->semaphores = 0;
-	node->flags = 0;
-}
-
-static struct i915_dependency *
-i915_dependency_alloc(void)
-{
-	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
-}
-
-static void
-i915_dependency_free(struct i915_dependency *dep)
-{
-	kmem_cache_free(global.slab_dependencies, dep);
-}
-
-bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
-				      struct i915_sched_node *signal,
-				      struct i915_dependency *dep,
-				      unsigned long flags)
-{
-	bool ret = false;
-
-	spin_lock_irq(&schedule_lock);
-
-	if (!node_signaled(signal)) {
-		INIT_LIST_HEAD(&dep->dfs_link);
-		list_add(&dep->wait_link, &signal->waiters_list);
-		list_add(&dep->signal_link, &node->signalers_list);
-		dep->signaler = signal;
-		dep->flags = flags;
-
-		/* Keep track of whether anyone on this chain has a semaphore */
-		if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
-		    !node_started(signal))
-			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
-
-		ret = true;
-	}
-
-	spin_unlock_irq(&schedule_lock);
-
-	return ret;
-}
-
-int i915_sched_node_add_dependency(struct i915_sched_node *node,
-				   struct i915_sched_node *signal)
-{
-	struct i915_dependency *dep;
-
-	dep = i915_dependency_alloc();
-	if (!dep)
-		return -ENOMEM;
-
-	if (!__i915_sched_node_add_dependency(node, signal, dep,
-					      I915_DEPENDENCY_ALLOC))
-		i915_dependency_free(dep);
-
-	return 0;
-}
-
-void i915_sched_node_fini(struct i915_sched_node *node)
-{
-	struct i915_dependency *dep, *tmp;
-
-	GEM_BUG_ON(!list_empty(&node->link));
-
-	spin_lock_irq(&schedule_lock);
-
-	/*
-	 * Everyone we depended upon (the fences we wait to be signaled)
-	 * should retire before us and remove themselves from our list.
-	 * However, retirement is run independently on each timeline and
-	 * so we may be called out-of-order.
-	 */
-	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!node_signaled(dep->signaler));
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->wait_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(dep);
-	}
-
-	/* Remove ourselves from everyone who depends upon us */
-	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
-		GEM_BUG_ON(dep->signaler != node);
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->signal_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(dep);
-	}
-
-	spin_unlock_irq(&schedule_lock);
-}
-
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -239,6 +136,11 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	return &p->requests[idx];
 }
 
+void __i915_priolist_free(struct i915_priolist *p)
+{
+	kmem_cache_free(global.slab_priorities, p);
+}
+
 struct sched_cache {
 	struct list_head *priolist;
 };
@@ -436,9 +338,107 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 	spin_unlock_irqrestore(&schedule_lock, flags);
 }
 
-void __i915_priolist_free(struct i915_priolist *p)
+void i915_sched_node_init(struct i915_sched_node *node)
 {
-	kmem_cache_free(global.slab_priorities, p);
+	INIT_LIST_HEAD(&node->signalers_list);
+	INIT_LIST_HEAD(&node->waiters_list);
+	INIT_LIST_HEAD(&node->link);
+	node->attr.priority = I915_PRIORITY_INVALID;
+	node->semaphores = 0;
+	node->flags = 0;
+}
+
+static struct i915_dependency *
+i915_dependency_alloc(void)
+{
+	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
+}
+
+static void
+i915_dependency_free(struct i915_dependency *dep)
+{
+	kmem_cache_free(global.slab_dependencies, dep);
+}
+
+bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
+				      struct i915_sched_node *signal,
+				      struct i915_dependency *dep,
+				      unsigned long flags)
+{
+	bool ret = false;
+
+	spin_lock_irq(&schedule_lock);
+
+	if (!node_signaled(signal)) {
+		INIT_LIST_HEAD(&dep->dfs_link);
+		list_add(&dep->wait_link, &signal->waiters_list);
+		list_add(&dep->signal_link, &node->signalers_list);
+		dep->signaler = signal;
+		dep->flags = flags;
+
+		/* Keep track of whether anyone on this chain has a semaphore */
+		if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
+		    !node_started(signal))
+			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
+
+		ret = true;
+	}
+
+	spin_unlock_irq(&schedule_lock);
+
+	return ret;
+}
+
+int i915_sched_node_add_dependency(struct i915_sched_node *node,
+				   struct i915_sched_node *signal)
+{
+	struct i915_dependency *dep;
+
+	dep = i915_dependency_alloc();
+	if (!dep)
+		return -ENOMEM;
+
+	if (!__i915_sched_node_add_dependency(node, signal, dep,
+					      I915_DEPENDENCY_ALLOC))
+		i915_dependency_free(dep);
+
+	return 0;
+}
+
+void i915_sched_node_fini(struct i915_sched_node *node)
+{
+	struct i915_dependency *dep, *tmp;
+
+	GEM_BUG_ON(!list_empty(&node->link));
+
+	spin_lock_irq(&schedule_lock);
+
+	/*
+	 * Everyone we depended upon (the fences we wait to be signaled)
+	 * should retire before us and remove themselves from our list.
+	 * However, retirement is run independently on each timeline and
+	 * so we may be called out-of-order.
+	 */
+	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
+		GEM_BUG_ON(!node_signaled(dep->signaler));
+		GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+		list_del(&dep->wait_link);
+		if (dep->flags & I915_DEPENDENCY_ALLOC)
+			i915_dependency_free(dep);
+	}
+
+	/* Remove ourselves from everyone who depends upon us */
+	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
+		GEM_BUG_ON(dep->signaler != node);
+		GEM_BUG_ON(!list_empty(&dep->dfs_link));
+
+		list_del(&dep->signal_link);
+		if (dep->flags & I915_DEPENDENCY_ALLOC)
+			i915_dependency_free(dep);
+	}
+
+	spin_unlock_irq(&schedule_lock);
 }
 
 static void i915_global_scheduler_shrink(void)

From f312c23ff92319f5f242b69d93885c53f92030b5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 May 2019 13:01:01 +0100
Subject: [PATCH 02/11] drm/i915: Pass i915_sched_node around internally

To simplify the next patch, update bump_priority and schedule to accept
the internal i915_sched_ndoe directly and not expect a request pointer.

add/remove: 0/0 grow/shrink: 2/1 up/down: 8/-15 (-7)
Function                                     old     new   delta
i915_schedule_bump_priority                  109     113      +4
i915_schedule                                 50      54      +4
__i915_schedule                              922     907     -15

v2: Adopt node for the old rq local, since it no longer is a request but
the origin node.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190513120102.29660-2-chris@chris-wilson.co.uk
(cherry picked from commit 52c76fb18a34fc08dd06f32b9fc83f1375f083ee)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 36 ++++++++++++++-------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 5756fcfe343d..3cfadb9db988 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -175,7 +175,7 @@ static bool inflight(const struct i915_request *rq,
 	return active->hw_context == rq->hw_context;
 }
 
-static void __i915_schedule(struct i915_request *rq,
+static void __i915_schedule(struct i915_sched_node *node,
 			    const struct i915_sched_attr *attr)
 {
 	struct intel_engine_cs *engine;
@@ -189,13 +189,13 @@ static void __i915_schedule(struct i915_request *rq,
 	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
-	if (i915_request_completed(rq))
+	if (node_signaled(node))
 		return;
 
-	if (prio <= READ_ONCE(rq->sched.attr.priority))
+	if (prio <= READ_ONCE(node->attr.priority))
 		return;
 
-	stack.signaler = &rq->sched;
+	stack.signaler = node;
 	list_add(&stack.dfs_link, &dfs);
 
 	/*
@@ -246,9 +246,9 @@ static void __i915_schedule(struct i915_request *rq,
 	 * execlists_submit_request()), we can set our own priority and skip
 	 * acquiring the engine locks.
 	 */
-	if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
-		GEM_BUG_ON(!list_empty(&rq->sched.link));
-		rq->sched.attr = *attr;
+	if (node->attr.priority == I915_PRIORITY_INVALID) {
+		GEM_BUG_ON(!list_empty(&node->link));
+		node->attr = *attr;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
 			return;
@@ -257,15 +257,14 @@ static void __i915_schedule(struct i915_request *rq,
 	}
 
 	memset(&cache, 0, sizeof(cache));
-	engine = rq->engine;
+	engine = node_to_request(node)->engine;
 	spin_lock(&engine->timeline.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-		struct i915_sched_node *node = dep->signaler;
-
 		INIT_LIST_HEAD(&dep->dfs_link);
 
+		node = dep->signaler;
 		engine = sched_lock_engine(node, engine, &cache);
 		lockdep_assert_held(&engine->timeline.lock);
 
@@ -315,13 +314,20 @@ static void __i915_schedule(struct i915_request *rq,
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 {
 	spin_lock_irq(&schedule_lock);
-	__i915_schedule(rq, attr);
+	__i915_schedule(&rq->sched, attr);
 	spin_unlock_irq(&schedule_lock);
 }
 
+static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
+{
+	struct i915_sched_attr attr = node->attr;
+
+	attr.priority |= bump;
+	__i915_schedule(node, &attr);
+}
+
 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 {
-	struct i915_sched_attr attr;
 	unsigned long flags;
 
 	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
@@ -330,11 +336,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 		return;
 
 	spin_lock_irqsave(&schedule_lock, flags);
-
-	attr = rq->sched.attr;
-	attr.priority |= bump;
-	__i915_schedule(rq, &attr);
-
+	__bump_priority(&rq->sched, bump);
 	spin_unlock_irqrestore(&schedule_lock, flags);
 }
 

From 9981927cc9e10fb4dbc24b2a5f8c17ab133859a0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 15 May 2019 14:00:50 +0100
Subject: [PATCH 03/11] drm/i915: Bump signaler priority on adding a waiter

The handling of the no-preemption priority level imposes the restriction
that we need to maintain the implied ordering even though preemption is
disabled. Otherwise we may end up with an AB-BA deadlock across multiple
engine due to a real preemption event reordering the no-preemption
WAITs. To resolve this issue we currently promote all requests to WAIT
on unsubmission, however this interferes with the timeslicing
requirement that we do not apply any implicit promotion that will defeat
the round-robin timeslice list. (If we automatically promote the active
request it will go back to the head of the queue and not the tail!)

So we need implicit promotion to prevent reordering around semaphores
where we are not allowed to preempt, and we must avoid implicit
promotion on unsubmission. So instead of at unsubmit, if we apply that
implicit promotion on adding the dependency, we avoid the semaphore
deadlock and we also reduce the gains made by the promotion for user
space waiting. Furthermore, by keeping the earlier dependencies at a
higher level, we reduce the search space for timeslicing without
altering runtime scheduling too badly (no dependencies at all will be
assigned a higher priority for rrul).

v2: Limit the bump to external edges (as originally intended) i.e.
between contexts and out to the user.

Testcase: igt/gem_concurrent_blit
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190515130052.4475-3-chris@chris-wilson.co.uk
(cherry picked from commit 6e7eb7a80769e7250e31652b96918cf7f3e0d285)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c         |  9 ---------
 drivers/gpu/drm/i915/i915_scheduler.c       | 11 +++++++++++
 drivers/gpu/drm/i915/i915_scheduler_types.h |  3 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c  | 12 ++++++++----
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f6c78c0fa74b..f258281bf3f3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -502,15 +502,6 @@ void __i915_request_unsubmit(struct i915_request *request)
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-	/*
-	 * As we do not allow WAIT to preempt inflight requests,
-	 * once we have executed a request, along with triggering
-	 * any execution callbacks, we must preserve its ordering
-	 * within the non-preemptible FIFO.
-	 */
-	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
-	request->sched.attr.priority |= __NO_PREEMPTION;
-
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
 		i915_request_cancel_breadcrumb(request);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 3cfadb9db988..108f52e1bf35 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -383,6 +383,16 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		    !node_started(signal))
 			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
 
+		/*
+		 * As we do not allow WAIT to preempt inflight requests,
+		 * once we have executed a request, along with triggering
+		 * any execution callbacks, we must preserve its ordering
+		 * within the non-preemptible FIFO.
+		 */
+		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
+		if (flags & I915_DEPENDENCY_EXTERNAL)
+			__bump_priority(signal, __NO_PREEMPTION);
+
 		ret = true;
 	}
 
@@ -401,6 +411,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
 		return -ENOMEM;
 
 	if (!__i915_sched_node_add_dependency(node, signal, dep,
+					      I915_DEPENDENCY_EXTERNAL |
 					      I915_DEPENDENCY_ALLOC))
 		i915_dependency_free(dep);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index f1af3916a808..4f2b2eb7c3e5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -66,7 +66,8 @@ struct i915_dependency {
 	struct list_head wait_link;
 	struct list_head dfs_link;
 	unsigned long flags;
-#define I915_DEPENDENCY_ALLOC BIT(0)
+#define I915_DEPENDENCY_ALLOC		BIT(0)
+#define I915_DEPENDENCY_EXTERNAL	BIT(1)
 };
 
 #endif /* _I915_SCHEDULER_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index fbee030db940..e8b0b5dbcb2c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -99,12 +99,14 @@ static int live_busywait_preempt(void *arg)
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
 		goto err_unlock;
-	ctx_hi->sched.priority = INT_MAX;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
 	ctx_lo = kernel_context(i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
-	ctx_lo->sched.priority = INT_MIN;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
@@ -954,12 +956,14 @@ static int live_preempt_hang(void *arg)
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
-	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
 	ctx_lo = kernel_context(i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
-	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;

From c80274bb5882ea1493551df7768977c5a43aa9d9 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 15 May 2019 14:00:51 +0100
Subject: [PATCH 04/11] drm/i915: Downgrade NEWCLIENT to non-preemptive

Commit 1413b2bc0717 ("drm/i915: Trim NEWCLIENT boosting") had the
intended consequence of not allowing a sequence of work that merely
crossed into a new engine the privilege to be promoted to NEWCLIENT
status. It also had the unintended consequence of actually making
NEWCLIENT effective on heavily oversubscribed transcode machines and
impacting upon their throughput.

If we consider a client packet composed of (rcsA, rcsB, vcs) and 30 of
those clients, using the NEWCLIENT boost that will be scheduled as

	rcsA x 30, (rcsB, vcs) x 30

where as before it would have been

	(rcsA, rcsB, vcs) x 30

That is with NEWCLIENT only boosting the first request of each client,
we would execute all rcsA requests prior to running on the vcs engines;
acruing a lot of dead time as compared to the previous case where the
vcs engine would be started in parallel to processing the second client.

The previous patch has the effect of delaying submission until it is
required by a third party (either the user with an explicit wait, or by
another client/engine). We reduce the NEWCLIENT bump to a mere WAIT,
which has the effect of removing its preemptive grant and reducing it to
the same level as any other user interaction -- that it will not be
promoted above the interengine dependencies, and so preventing NEWCLIENTS
from starving other engines. This a large nerf to the rrul properties of
the current NEWCLIENT, but it still does give prioritised submission to
new requests from light workloads.

References: b16c765122f9 ("drm/i915: Priority boost for new clients")
Fixes: 1413b2bc0717 ("drm/i915: Trim NEWCLIENT boosting") # customer impact
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190515130052.4475-4-chris@chris-wilson.co.uk
(cherry picked from commit 68fc728b01fcc93b26d52f6e884e738962a49a66)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_priolist_types.h | 5 ++---
 drivers/gpu/drm/i915/i915_request.c        | 2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index cc44ebd3b553..49709de69875 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -20,15 +20,14 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 3
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
 #define I915_PRIORITY_WAIT		((u8)BIT(0))
-#define I915_PRIORITY_NEWCLIENT		((u8)BIT(1))
-#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(2))
+#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(1))
 
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f258281bf3f3..ede79886cf98 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1228,7 +1228,7 @@ void i915_request_add(struct i915_request *request)
 		 * the bulk clients. (FQ_CODEL)
 		 */
 		if (list_empty(&request->sched.signalers_list))
-			attr.priority |= I915_PRIORITY_NEWCLIENT;
+			attr.priority |= I915_PRIORITY_WAIT;
 
 		engine->schedule(request, &attr);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4e0a351bfbca..11e5a86610bf 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,7 +164,7 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
-#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
+#define ACTIVE_PRIORITY (I915_PRIORITY_NOSEMAPHORE)
 
 static int execlists_context_deferred_alloc(struct intel_context *ce,
 					    struct intel_engine_cs *engine);

From a491cc8e1597ea25803191cded49d3686702a406 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 15 May 2019 14:00:49 +0100
Subject: [PATCH 05/11] drm/i915: Truly bump ready tasks ahead of busywaits

In commit b7404c7ecb38 ("drm/i915: Bump ready tasks ahead of
busywaits"), I tried cutting a corner in order to not install a signal
for each of our dependencies, and only listened to requests on which we
were intending to busywait. The compromise that was made was that
instead of then being able to promote the request with a full
NOSEMAPHORE like its non-busywaiting brethren, as we had not ensured we
had cleared the semaphore chain, we settled for only using the NEWCLIENT
boost. With an over saturated system with multiple NEWCLIENTS in flight
at any time, this was found to be an inadequate promotion and left us
with a much poorer scheduling order than prior to using semaphores.

The outcome of this patch, is that all requests have NOSEMAPHORE
priority when they have no dependencies and are ready to run and not
busywait, restoring the pre-semaphore ordering on saturated systems.

We can demonstrate the effect of poor scheduling order by oversaturating
the system using gem_wsim on a system with multiple vcs engines
(i.e running the same workloads across more clients than required for
peak throughput, e.g. media_load_balance_17i7.wsim -c4 -b context):

x v5.1 (normalized)
+ tip
* fix
+------------------------------------------------------------------------+
|                                                                    x   |
|                                                                    x   |
|                                                                    x   |
|                                                                    x   |
|                                                                   %x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %%x   |
|                                                                  %#x   |
|                                                                  %#x   |
|                                                                  %#x   |
|                                                                  %#x   |
|                                                                  %#x   |
|         +                                                        %#xx  |
|         +                                                        %#xx  |
|         +                                                       %%#xx  |
|         +                                                       %%#xx  |
|         +                                                       %%#xx  |
|         +                                                       %%#xx  |
|         +                                                       %%##x  |
|         +++                                                     %%##x  |
|         +++                                                     %%##x  |
|         +++                                                     %%##x  |
|        ++++                                                     %%##x  |
|        ++++                                                     %%##x  |
|        ++++                                                     %%##xx |
|        ++++                                                     %###xx |
|        ++++                                                     %###xx |
|        ++++                                                     %###xx |
|        ++++                                                     %###xx |
|        ++++ +                                                   %#O#xx |
|        ++++ +                                                   %#O#xx |
|        ++++++ +                                                 %#O#xx |
|       ++++++++++                                                %OOOxxx|
|       ++++++++++       +                                       %#OOO#xx|
|     + ++++++++++++ ++ +++++    +                        ++    @@OOOO#xx|
|                                                                   |A_| |
||__________M_______A____________________|                               |
|                                                                 |A_|   |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 120       0.99456       1.00628      0.999985     1.0001545  0.0024387139
+ 120      0.873021       1.00037      0.884134    0.90148752   0.039190862
Difference at 99.5% confidence
	-0.098667 +/- 0.0110762
	-9.86517% +/- 1.10745%
	(Student's t, pooled s = 0.0277657)
% 120      0.990207       1.00165     0.9970265    0.99699748     0.0021024
Difference at 99.5% confidence
	-0.003157 +/- 0.000908245
	-0.315651% +/- 0.0908105%
	(Student's t, pooled s = 0.00227678)

Fixes: b7404c7ecb38 ("drm/i915: Bump ready tasks ahead of busywaits")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190515130052.4475-2-chris@chris-wilson.co.uk
(cherry picked from commit 17db337f5098d29415314c4a588b842fc684394b)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 31 +++++++++++------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ede79886cf98..c88e538b2ef4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -573,18 +573,7 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 
 	switch (state) {
 	case FENCE_COMPLETE:
-		/*
-		 * We only check a small portion of our dependencies
-		 * and so cannot guarantee that there remains no
-		 * semaphore chain across all. Instead of opting
-		 * for the full NOSEMAPHORE boost, we go for the
-		 * smaller (but still preempting) boost of
-		 * NEWCLIENT. This will be enough to boost over
-		 * a busywaiting request (as that cannot be
-		 * NEWCLIENT) without accidentally boosting
-		 * a busywait over real work elsewhere.
-		 */
-		i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
+		i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
 		break;
 
 	case FENCE_FREE:
@@ -865,12 +854,6 @@ emit_semaphore_wait(struct i915_request *to,
 	if (err < 0)
 		return err;
 
-	err = i915_sw_fence_await_dma_fence(&to->semaphore,
-					    &from->fence, 0,
-					    I915_FENCE_GFP);
-	if (err < 0)
-		return err;
-
 	/* We need to pin the signaler's HWSP until we are finished reading. */
 	err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
 	if (err)
@@ -936,8 +919,18 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 						    &from->fence, 0,
 						    I915_FENCE_GFP);
 	}
+	if (ret < 0)
+		return ret;
 
-	return ret < 0 ? ret : 0;
+	if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
+		ret = i915_sw_fence_await_dma_fence(&to->semaphore,
+						    &from->fence, 0,
+						    I915_FENCE_GFP);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
 }
 
 int

From a8c2d5ab9e71be3f9431c47bd45329a36e1fc650 Mon Sep 17 00:00:00 2001
From: Weinan <weinan.z.li@intel.com>
Date: Fri, 10 May 2019 15:57:20 +0800
Subject: [PATCH 06/11] drm/i915/gvt: emit init breadcrumb for gvt request

"To track whether a request has started on HW, we can emit a breadcrumb at
the beginning of the request and check its timeline's HWSP to see if the
breadcrumb has advanced past the start of this request." It means all the
request which timeline's has_init_breadcrumb is true, then the
emit_init_breadcrumb process must have before emitting the real commands,
otherwise, the scheduler might get a wrong state of this request during
reset. If the request is exactly the guilty one, the scheduler won't
terminate it with the wrong state. To avoid this, do emit_init_breadcrumb
for all the requests from gvt.

v2: cc to stable kernel

Fixes: 8547444137ec ("drm/i915: Identify active requests")
Cc: stable@vger.kernel.org
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Weinan <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 7c99bbc3e2b8..ccd71152c9bc 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -298,12 +298,31 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	struct i915_request *req = workload->req;
 	void *shadow_ring_buffer_va;
 	u32 *cs;
+	int err;
 
 	if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)
 		|| IS_COFFEELAKE(req->i915))
 		&& is_inhibit_context(req->hw_context))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
+	/*
+	 * To track whether a request has started on HW, we can emit a
+	 * breadcrumb at the beginning of the request and check its
+	 * timeline's HWSP to see if the breadcrumb has advanced past the
+	 * start of this request. Actually, the request must have the
+	 * init_breadcrumb if its timeline set has_init_bread_crumb, or the
+	 * scheduler might get a wrong state of it during reset. Since the
+	 * requests from gvt always set the has_init_breadcrumb flag, here
+	 * need to do the emit_init_breadcrumb for all the requests.
+	 */
+	if (req->engine->emit_init_breadcrumb) {
+		err = req->engine->emit_init_breadcrumb(req);
+		if (err) {
+			gvt_vgpu_err("fail to emit init breadcrumb\n");
+			return err;
+		}
+	}
+
 	/* allocate shadow ring buffer */
 	cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
 	if (IS_ERR(cs)) {

From df2ea3c296b1f3d66f297d240124c2ebd74c3db3 Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Tue, 7 May 2019 22:14:04 -0400
Subject: [PATCH 07/11] drm/i915/gvt: use cmd to restore in-context mmios to hw
 for gen9 platform

for restore-inhibit context, hardware will not load in-context mmios
(engine context part) to hardware, but hardware will save the mmio
values in hardware back to context image. So, in order to save correct
values of vGPU back to context image, values of vGPU mmios have to be
loaded into hardware first for restore-inhibit context.

In this patch, the mechanism is applied to all gen9 platform.

The reason excluding gen8 platforms is only because of lacking of testing
on those platforms.

v3: for mocs registers, goto in-context mmios save-restore path for skl
platform as well (weinan li)
v2: update vreg when scanning indirect context for inhibit context for
gen9

Cc: Weinan Li <weinan.z.li@intel.com>
Acked-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c   | 14 +++++++++-----
 drivers/gpu/drm/i915/gvt/mmio_context.c | 10 +++-------
 drivers/gpu/drm/i915/gvt/scheduler.c    |  4 +---
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index ab002cfd3cab..5cb59c0b4bbe 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -896,12 +896,16 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	}
 
 	/* TODO
-	 * Right now only scan LRI command on KBL and in inhibit context.
-	 * It's good enough to support initializing mmio by lri command in
-	 * vgpu inhibit context on KBL.
+	 * In order to let workload with inhibit context to generate
+	 * correct image data into memory, vregs values will be loaded to
+	 * hw via LRIs in the workload with inhibit context. But as
+	 * indirect context is loaded prior to LRIs in workload, we don't
+	 * want reg values specified in indirect context overwritten by
+	 * LRIs in workloads. So, when scanning an indirect context, we
+	 * update reg values in it into vregs, so LRIs in workload with
+	 * inhibit context will restore with correct values
 	 */
-	if ((IS_KABYLAKE(s->vgpu->gvt->dev_priv)
-		|| IS_COFFEELAKE(s->vgpu->gvt->dev_priv)) &&
+	if (IS_GEN(gvt->dev_priv, 9) &&
 			intel_gvt_mmio_is_in_ctx(gvt, offset) &&
 			!strncmp(cmd, "lri", 3)) {
 		intel_gvt_hypervisor_read_gpa(s->vgpu,
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index edf6d646eb25..299b602b0643 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -392,10 +392,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;
 
-	if (ring_id == RCS0 &&
-	    (IS_KABYLAKE(dev_priv) ||
-	     IS_BROXTON(dev_priv) ||
-	     IS_COFFEELAKE(dev_priv)))
+	if (ring_id == RCS0 && IS_GEN(dev_priv, 9))
 		return;
 
 	if (!pre && !gen9_render_mocs.initialized)
@@ -470,11 +467,10 @@ static void switch_mmio(struct intel_vgpu *pre,
 			continue;
 		/*
 		 * No need to do save or restore of the mmio which is in context
-		 * state image on kabylake, it's initialized by lri command and
+		 * state image on gen9, it's initialized by lri command and
 		 * save or restore with context together.
 		 */
-		if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)
-			|| IS_COFFEELAKE(dev_priv)) && mmio->in_context)
+		if (IS_GEN(dev_priv, 9) && mmio->in_context)
 			continue;
 
 		// save
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index ccd71152c9bc..13632dba8b2a 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -300,9 +300,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	u32 *cs;
 	int err;
 
-	if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)
-		|| IS_COFFEELAKE(req->i915))
-		&& is_inhibit_context(req->hw_context))
+	if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/*

From 39947afc6c063940cbd80824e75eb0cf84591c3c Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Tue, 7 May 2019 22:15:00 -0400
Subject: [PATCH 08/11] drm/i915/gvt: Tiled Resources mmios are in-context
 mmios for gen9+

TRVATTL3PTRDW(0x4de0-0x4de4), TRNULLDETCT(0x4de8), TRINVTILEDETCT(0x4dec),
TRTTE(0x4df0), TRVADR(0x4df4) are in-context mmios for gen9+

Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch")
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 299b602b0643..f4e60d736cfb 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -108,12 +108,12 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 	{RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
 	{RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
 	{RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
-	{RCS0, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
-	{RCS0, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
-	{RCS0, TRNULLDETCT, 0, false}, /* 0x4de8 */
-	{RCS0, TRINVTILEDETCT, 0, false}, /* 0x4dec */
-	{RCS0, TRVADR, 0, false}, /* 0x4df0 */
-	{RCS0, TRTTE, 0, false}, /* 0x4df4 */
+	{RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
+	{RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
+	{RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
+	{RCS0, TRINVTILEDETCT, 0, true}, /* 0x4dec */
+	{RCS0, TRVADR, 0, true}, /* 0x4df0 */
+	{RCS0, TRTTE, 0, true}, /* 0x4df4 */
 
 	{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
 	{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */

From b6241002039118d6bea78f50f8f19195b41ab602 Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Tue, 7 May 2019 22:16:33 -0400
Subject: [PATCH 09/11] drm/i915/gvt: add 0x4dfc to gen9 save-restore list

0x4dfc is in-context mmio for gen9+, but each vm have different settings
need to add it to save-restore list along with other trtt registers

Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch")
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index f4e60d736cfb..90bb3df0db50 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -114,6 +114,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 	{RCS0, TRINVTILEDETCT, 0, true}, /* 0x4dec */
 	{RCS0, TRVADR, 0, true}, /* 0x4df0 */
 	{RCS0, TRTTE, 0, true}, /* 0x4df4 */
+	{RCS0, _MMIO(0x4dfc), 0, true},
 
 	{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
 	{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */

From e175a2520c7788a323ae93f04013b8fdaa552c69 Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Tue, 7 May 2019 22:16:44 -0400
Subject: [PATCH 10/11] drm/i915/gvt: do not let TRTTE and 0x4dfc write
 passthrough to hardware

the vGPU write on TRTTE and 0x4dfc is now write to vreg first. their
values all be restored hardware when context switching.

Fixes: e39c5add3221 ("drm/i915/gvt: vGPU MMIO virtualization")
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 90673fca792f..e09bd6e0cc4d 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1364,7 +1364,6 @@ static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset,
 static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	u32 trtte = *(u32 *)p_data;
 
 	if ((trtte & 1) && (trtte & (1 << 1)) == 0) {
@@ -1373,11 +1372,6 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
 		return -EINVAL;
 	}
 	write_vreg(vgpu, offset, p_data, bytes);
-	/* TRTTE is not per-context */
-
-	mmio_hw_access_pre(dev_priv);
-	I915_WRITE(_MMIO(offset), vgpu_vreg(vgpu, offset));
-	mmio_hw_access_post(dev_priv);
 
 	return 0;
 }
@@ -1385,15 +1379,6 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
 static int gen9_trtt_chicken_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	u32 val = *(u32 *)p_data;
-
-	if (val & 1) {
-		/* unblock hw logic */
-		mmio_hw_access_pre(dev_priv);
-		I915_WRITE(_MMIO(offset), val);
-		mmio_hw_access_post(dev_priv);
-	}
 	write_vreg(vgpu, offset, p_data, bytes);
 	return 0;
 }

From 591c39ffac4ab1ddf2ea6d49331cb614e0682b28 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 13 May 2019 12:22:44 +0300
Subject: [PATCH 11/11] drm/i915/gvt: Fix an error code in
 ppgtt_populate_spt_by_guest_entry()

"ret" is uninitialized on this path but it should be -EINVAL.

Fixes: 930c8dfea4b8 ("drm/i915/gvt: Check if get_next_pt_type() always returns a valid value")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 08c74e65836b..244ad1729764 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1076,8 +1076,10 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
 	} else {
 		int type = get_next_pt_type(we->type);
 
-		if (!gtt_type_is_pt(type))
+		if (!gtt_type_is_pt(type)) {
+			ret = -EINVAL;
 			goto err;
+		}
 
 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
 		if (IS_ERR(spt)) {