diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec5f6fb42ab3..388c028e223c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2189,10 +2189,12 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_MAPPABLE 0x1 #define PIN_NONBLOCK 0x2 #define PIN_GLOBAL 0x4 +#define PIN_OFFSET_BIAS 0x8 +#define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, uint32_t alignment, - unsigned flags); + uint64_t flags); int __must_check i915_vma_unbind(struct i915_vma *vma); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); @@ -2445,6 +2447,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignment, unsigned cache_level, + unsigned long start, + unsigned long end, unsigned flags); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); int i915_gem_evict_everything(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b391f30f9985..3326770c9ed2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3326,12 +3326,14 @@ static struct i915_vma * i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned alignment, - unsigned flags) + uint64_t flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; - size_t gtt_max = + unsigned long start = + flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + unsigned long end = flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3360,11 +3362,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, /* If the object is bigger than the entire aperture, reject it early * before evicting everything in a vain attempt to find space. */ - if (obj->base.size > gtt_max) { - DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", + if (obj->base.size > end) { + DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", - gtt_max); + end); return ERR_PTR(-E2BIG); } @@ -3381,12 +3383,15 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, - obj->cache_level, 0, gtt_max, + obj->cache_level, + start, end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, - obj->cache_level, flags); + obj->cache_level, + start, end, + flags); if (ret == 0) goto search_free; @@ -3946,11 +3951,30 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return ret; } +static bool +i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + + if (alignment && + vma->node.start & (alignment - 1)) + return true; + + if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) + return true; + + if (flags & PIN_OFFSET_BIAS && + vma->node.start < (flags & PIN_OFFSET_MASK)) + return true; + + return false; +} + int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, uint32_t alignment, - unsigned flags) + uint64_t flags) { struct i915_vma *vma; int ret; @@ -3963,15 +3987,13 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; - if ((alignment && - vma->node.start & (alignment - 1)) || - (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) { + if (i915_vma_misplaced(vma, alignment, flags)) { WARN(vma->pin_count, "bo is already pinned with incorrect alignment:" " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", i915_gem_obj_offset(obj, vm), alignment, - flags & PIN_MAPPABLE, + !!(flags & PIN_MAPPABLE), obj->map_and_fenceable); ret = i915_vma_unbind(vma); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 75fca63dc8c1..bbf4b12d842e 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -68,9 +68,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) int i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, int min_size, unsigned alignment, unsigned cache_level, + unsigned long start, unsigned long end, unsigned flags) { - struct drm_i915_private *dev_priv = dev->dev_private; struct list_head eviction_list, unwind_list; struct i915_vma *vma; int ret = 0; @@ -102,11 +102,10 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, */ INIT_LIST_HEAD(&unwind_list); - if (flags & PIN_MAPPABLE) { - BUG_ON(!i915_is_ggtt(vm)); + if (start != 0 || end != vm->total) { drm_mm_init_scan_with_range(&vm->mm, min_size, - alignment, cache_level, 0, - dev_priv->gtt.mappable_end); + alignment, cache_level, + start, end); } else drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7aaf6390cfaf..20fef6c50267 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,9 @@ #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) + +#define BATCH_OFFSET_BIAS (256*1024) struct eb_vmas { struct list_head vmas; @@ -545,7 +548,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; bool need_fence; - unsigned flags; + uint64_t flags; int ret; flags = 0; @@ -559,6 +562,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) + flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); if (ret) @@ -592,6 +597,36 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, return 0; } +static bool +eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + struct drm_i915_gem_object *obj = vma->obj; + bool need_fence, need_mappable; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = need_fence || need_reloc_mappable(vma); + + WARN_ON((need_mappable || need_fence) && + !i915_is_ggtt(vma->vm)); + + if (entry->alignment && + vma->node.start & (entry->alignment - 1)) + return true; + + if (need_mappable && !obj->map_and_fenceable) + return true; + + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && + vma->node.start < BATCH_OFFSET_BIAS) + return true; + + return false; +} + static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct list_head *vmas, @@ -653,26 +688,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, /* Unbind any ill-fitting objects or pin. */ list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - bool need_fence, need_mappable; - - obj = vma->obj; - if (!drm_mm_node_allocated(&vma->node)) continue; - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); - - WARN_ON((need_mappable || need_fence) && - !i915_is_ggtt(vma->vm)); - - if ((entry->alignment && - vma->node.start & (entry->alignment - 1)) || - (need_mappable && !obj->map_and_fenceable)) + if (eb_vma_misplaced(vma, has_fenced_gpu_access)) ret = i915_vma_unbind(vma); else ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); @@ -999,6 +1018,25 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +static struct drm_i915_gem_object * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma->obj; +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1153,7 +1191,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj; + batch_obj = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 154b0f8bb88d..5deb22864c52 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1089,7 +1089,9 @@ alloc: if (ret == -ENOSPC && !retried) { ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, GEN6_PD_SIZE, GEN6_PD_ALIGN, - I915_CACHE_NONE, 0); + I915_CACHE_NONE, + 0, dev_priv->gtt.base.total, + 0); if (ret) return ret;