diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 217e0b58b930..9625e1a662ed 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,17 +28,19 @@ #include "i915_drv.h" #include "intel_renderstate.h" -struct render_state { +struct intel_render_state { const struct intel_renderstate_rodata *rodata; struct i915_vma *vma; - u32 aux_batch_size; - u32 aux_batch_offset; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; }; static const struct intel_renderstate_rodata * -render_state_get_rodata(const struct drm_i915_gem_request *req) +render_state_get_rodata(const struct intel_engine_cs *engine) { - switch (INTEL_GEN(req->i915)) { + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; case 7: @@ -63,29 +65,27 @@ render_state_get_rodata(const struct drm_i915_gem_request *req) */ #define OUT_BATCH(batch, i, val) \ do { \ - if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \ - ret = -ENOSPC; \ - goto err_out; \ - } \ + if ((i) >= PAGE_SIZE / sizeof(u32)) \ + goto err; \ (batch)[(i)++] = (val); \ } while(0) -static int render_state_setup(struct render_state *so) +static int render_state_setup(struct intel_render_state *so, + struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(so->vma->vm->dev); const struct intel_renderstate_rodata *rodata = so->rodata; - const bool has_64bit_reloc = INTEL_GEN(dev_priv) >= 8; + const bool has_64bit_reloc = INTEL_GEN(i915) >= 8; + struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; - struct page *page; + unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - page = i915_gem_object_get_dirty_page(so->vma->obj, 0); - d = kmap(page); + d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -95,10 +95,8 @@ static int render_state_setup(struct render_state *so) s = lower_32_bits(r); if (has_64bit_reloc) { if (i + 1 >= rodata->batch_items || - rodata->batch[i + 1] != 0) { - ret = -EINVAL; - goto err_out; - } + rodata->batch[i + 1] != 0) + goto err; d[i++] = s; s = upper_32_bits(r); @@ -110,12 +108,20 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + if (rodata->reloc[reloc_index] != -1) { + DRM_ERROR("only %d relocs resolved\n", reloc_index); + goto err; + } + + so->batch_offset = so->vma->node.start; + so->batch_size = rodata->batch_items * sizeof(u32); + while (i % CACHELINE_DWORDS) OUT_BATCH(d, i, MI_NOOP); - so->aux_batch_offset = i * sizeof(u32); + so->aux_offset = i * sizeof(u32); - if (HAS_POOLED_EU(dev_priv)) { + if (HAS_POOLED_EU(i915)) { /* * We always program 3x6 pool config but depending upon which * subslice is disabled HW drops down to appropriate config @@ -143,89 +149,131 @@ static int render_state_setup(struct render_state *so) } OUT_BATCH(d, i, MI_BATCH_BUFFER_END); - so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset; - + so->aux_size = i * sizeof(u32) - so->aux_offset; + so->aux_offset += so->batch_offset; /* * Since we are sending length, we need to strictly conform to * all requirements. For Gen2 this must be a multiple of 8. */ - so->aux_batch_size = ALIGN(so->aux_batch_size, 8); + so->aux_size = ALIGN(so->aux_size, 8); - kunmap(page); + if (needs_clflush) + drm_clflush_virt_range(d, i * sizeof(u32)); + kunmap_atomic(d); - ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false); - if (ret) - return ret; - - if (rodata->reloc[reloc_index] != -1) { - DRM_ERROR("only %d relocs resolved\n", reloc_index); - return -EINVAL; - } - - return 0; - -err_out: - kunmap(page); + ret = i915_gem_object_set_to_gtt_domain(obj, false); +out: + i915_gem_obj_finish_shmem_access(obj); return ret; + +err: + kunmap_atomic(d); + ret = -EINVAL; + goto out; } #undef OUT_BATCH -int i915_gem_render_state_init(struct drm_i915_gem_request *req) +int i915_gem_render_state_init(struct intel_engine_cs *engine) { - struct render_state so; + struct intel_render_state *so; + const struct intel_renderstate_rodata *rodata; struct drm_i915_gem_object *obj; int ret; - if (WARN_ON(req->engine->id != RCS)) - return -ENOENT; - - so.rodata = render_state_get_rodata(req); - if (!so.rodata) + if (engine->id != RCS) return 0; - if (so.rodata->batch_items * 4 > 4096) + rodata = render_state_get_rodata(engine); + if (!rodata) + return 0; + + if (rodata->batch_items * 4 > 4096) return -EINVAL; - obj = i915_gem_object_create_internal(req->i915, 4096); - if (IS_ERR(obj)) - return PTR_ERR(obj); + so = kmalloc(sizeof(*so), GFP_KERNEL); + if (!so) + return -ENOMEM; - so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL); - if (IS_ERR(so.vma)) { - ret = PTR_ERR(so.vma); + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto err_free; + } + + so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so->vma)) { + ret = PTR_ERR(so->vma); goto err_obj; } - ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL); - if (ret) - goto err_obj; + so->rodata = rodata; + engine->render_state = so; + return 0; - ret = render_state_setup(&so); - if (ret) - goto err_unpin; +err_obj: + i915_gem_object_put(obj); +err_free: + kfree(so); + return ret; +} - ret = req->engine->emit_bb_start(req, so.vma->node.start, - so.rodata->batch_items * 4, +int i915_gem_render_state_emit(struct drm_i915_gem_request *req) +{ + struct intel_render_state *so; + int ret; + + so = req->engine->render_state; + if (!so) + return 0; + + /* Recreate the page after shrinking */ + if (!so->vma->obj->pages) + so->batch_offset = -1; + + ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + return ret; + + if (so->vma->node.start != so->batch_offset) { + ret = render_state_setup(so, req->i915); + if (ret) + goto err_unpin; + } + + ret = req->engine->emit_bb_start(req, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (ret) goto err_unpin; - if (so.aux_batch_size > 8) { + if (so->aux_size > 8) { ret = req->engine->emit_bb_start(req, - (so.vma->node.start + - so.aux_batch_offset), - so.aux_batch_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (ret) goto err_unpin; } - i915_vma_move_to_active(so.vma, req, 0); + i915_vma_move_to_active(so->vma, req, 0); err_unpin: - i915_vma_unpin(so.vma); - i915_vma_close(so.vma); -err_obj: - __i915_gem_object_release_unless_active(obj); + i915_vma_unpin(so->vma); return ret; } + +void i915_gem_render_state_fini(struct intel_engine_cs *engine) +{ + struct intel_render_state *so; + struct drm_i915_gem_object *obj; + + so = fetch_and_zero(&engine->render_state); + if (!so) + return; + + obj = so->vma->obj; + + i915_vma_close(so->vma); + __i915_gem_object_release_unless_active(obj); + + kfree(so); +} diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 18cce3f06e9c..87481845799d 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,6 +26,8 @@ struct drm_i915_gem_request; -int i915_gem_render_state_init(struct drm_i915_gem_request *req); +int i915_gem_render_state_init(struct intel_engine_cs *engine); +int i915_gem_render_state_emit(struct drm_i915_gem_request *req); +void i915_gem_render_state_fini(struct intel_engine_cs *engine); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b2de371d2bf5..fd551824adf9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -314,6 +314,10 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) return ret; + ret = i915_gem_render_state_init(engine); + if (ret) + return ret; + return 0; } @@ -328,6 +332,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { intel_engine_cleanup_scratch(engine); + i915_gem_render_state_fini(engine); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index bc86585b9fbb..1c1bd30e8b2d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1637,7 +1637,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return i915_gem_render_state_init(req); + return i915_gem_render_state_emit(req); } /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a15b9b5f2924..aaa46d9ffbc1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -648,7 +648,7 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) if (ret != 0) return ret; - ret = i915_gem_render_state_init(req); + ret = i915_gem_render_state_emit(req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 09bb89cfb7c3..cb6e96c6cd47 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -157,6 +157,7 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; +struct intel_render_state; struct intel_engine_cs { struct drm_i915_private *i915; @@ -184,6 +185,8 @@ struct intel_engine_cs { unsigned int irq_shift; struct intel_ring *buffer; + struct intel_render_state *render_state; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the