Merge branch 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel into drm-next

drm-intel-next-2016-08-22:
- bugfixes and cleanups for rcu-protected requests (Chris)
- atomic modeset fixes for gpu reset on pre-g4x (Maarten&Ville)
- guc submission improvements (Dave Gordon)
- panel power sequence cleanup (Imre)
- better use of stolen and unmappable ggtt (Chris), plus prep work to make that
  happen
- rework of framebuffer offsets, prep for multi-plane framebuffers (Ville)
- fully partial ggtt vmaps, including fenced ones (Chris)
- move lots more of the gem tracking from the object to the vma (Chris)
- tune the command parser (Chris)
- allow fbc without fences on recent platforms (Chris)
- fbc frontbuffer tracking fixes (Chris)
- fast prefaulting using io-mappping.h pgprot caching (Chris)

* 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel: (141 commits)
  io-mapping: Fixup for different names of writecombine
  io-mapping.h: s/PAGE_KERNEL_IO/PAGE_KERNEL/
  drm/i915: Update DRIVER_DATE to 20160822
  drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
  drm/i915: Embed the io-mapping struct inside drm_i915_private
  io-mapping: Always create a struct to hold metadata about the io-mapping
  drm/i915/fbc: Allow on unfenced surfaces, for recent gen
  drm/i915/fbc: Don't set an illegal fence if unfenced
  drm/i915: Flush delayed fence releases after reset
  drm/i915: Reattach comment, complete type specification
  drm/i915/cmdparser: Accelerate copies from WC memory
  drm/i915/cmdparser: Use binary search for faster register lookup
  drm/i915/cmdparser: Check for SKIP descriptors first
  drm/i915/cmdparser: Compare against the previous command descriptor
  drm/i915/cmdparser: Improve hash function
  drm/i915/cmdparser: Only cache the dst vmap
  drm/i915/cmdparser: Use cached vmappings
  drm/i915/cmdparser: Add the TIMESTAMP register for the other engines
  drm/i915/cmdparser: Make initialisation failure non-fatal
  drm/i915: Stop discarding GTT cache-domain on unbind vma
  ...
This commit is contained in:
Dave Airlie 2016-08-25 12:36:36 +10:00
commit 51d6120792
53 changed files with 4313 additions and 3363 deletions

View file

@ -317,16 +317,11 @@ static phys_addr_t __init i85x_stolen_base(int num, int slot, int func,
static phys_addr_t __init i865_stolen_base(int num, int slot, int func,
size_t stolen_size)
{
u16 toud;
u16 toud = 0;
/*
* FIXME is the graphics stolen memory region
* always at TOUD? Ie. is it always the last
* one to be allocated by the BIOS?
*/
toud = read_pci_config_16(0, 0, 0, I865_TOUD);
return (phys_addr_t)toud << 16;
return (phys_addr_t)(toud << 16) + i845_tseg_size();
}
static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,

View file

@ -845,6 +845,8 @@ void intel_gtt_insert_page(dma_addr_t addr,
unsigned int flags)
{
intel_private.driver->write_entry(addr, pg, flags);
if (intel_private.driver->chipset_flush)
intel_private.driver->chipset_flush();
}
EXPORT_SYMBOL(intel_gtt_insert_page);

View file

@ -47,7 +47,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
obj-$(CONFIG_DRM_MGA) += mga/
obj-$(CONFIG_DRM_I810) += i810/
obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_VC4) += vc4/
obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/

View file

@ -3,12 +3,16 @@
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
subdir-ccflags-y += \
$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
# Please keep these build lists sorted!
# core driver code
i915-y := i915_drv.o \
i915_irq.o \
i915_memcpy.o \
i915_mm.o \
i915_params.o \
i915_pci.o \
i915_suspend.o \
@ -110,6 +114,6 @@ i915-y += intel_gvt.o
include $(src)/gvt/Makefile
endif
obj-$(CONFIG_DRM_I915) += i915.o
obj-$(CONFIG_DRM_I915) += i915.o
CFLAGS_i915_trace_points.o := -I$(src)

View file

@ -86,24 +86,25 @@
* general bitmasking mechanism.
*/
#define STD_MI_OPCODE_MASK 0xFF800000
#define STD_3D_OPCODE_MASK 0xFFFF0000
#define STD_2D_OPCODE_MASK 0xFFC00000
#define STD_MFX_OPCODE_MASK 0xFFFF0000
#define STD_MI_OPCODE_SHIFT (32 - 9)
#define STD_3D_OPCODE_SHIFT (32 - 16)
#define STD_2D_OPCODE_SHIFT (32 - 10)
#define STD_MFX_OPCODE_SHIFT (32 - 16)
#define MIN_OPCODE_SHIFT 16
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
.cmd = { (op), (opm) }, \
.cmd = { (op), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
/* Convenience macros to compress the tables */
#define SMI STD_MI_OPCODE_MASK
#define S3D STD_3D_OPCODE_MASK
#define S2D STD_2D_OPCODE_MASK
#define SMFX STD_MFX_OPCODE_MASK
#define SMI STD_MI_OPCODE_SHIFT
#define S3D STD_3D_OPCODE_SHIFT
#define S2D STD_2D_OPCODE_SHIFT
#define SMFX STD_MFX_OPCODE_SHIFT
#define F true
#define S CMD_DESC_SKIP
#define R CMD_DESC_REJECT
@ -350,6 +351,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
#undef CMD
#undef SMI
#undef S3D
@ -458,6 +462,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG32(GEN7_GPGPU_DISPATCHDIMX),
REG32(GEN7_GPGPU_DISPATCHDIMY),
REG32(GEN7_GPGPU_DISPATCHDIMZ),
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
@ -473,6 +478,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG32(GEN7_L3SQCREG1),
REG32(GEN7_L3CNTLREG2),
REG32(GEN7_L3CNTLREG3),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
@ -502,7 +508,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
};
static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG32(BCS_SWCTRL),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
@ -691,12 +700,26 @@ struct cmd_node {
* non-opcode bits being set. But if we don't include those bits, some 3D
* commands may hash to the same bucket due to not including opcode bits that
* make the command unique. For now, we will risk hashing to the same bucket.
*
* If we attempt to generate a perfect hash, we should be able to look at bits
* 31:29 of a command from a batch buffer and use the full mask for that
* client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
*/
#define CMD_HASH_MASK STD_MI_OPCODE_MASK
static inline u32 cmd_header_key(u32 x)
{
u32 shift;
switch (x >> INSTR_CLIENT_SHIFT) {
default:
case INSTR_MI_CLIENT:
shift = STD_MI_OPCODE_SHIFT;
break;
case INSTR_RC_CLIENT:
shift = STD_3D_OPCODE_SHIFT;
break;
case INSTR_BC_CLIENT:
shift = STD_2D_OPCODE_SHIFT;
break;
}
return x >> shift;
}
static int init_hash_table(struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
@ -720,7 +743,7 @@ static int init_hash_table(struct intel_engine_cs *engine,
desc_node->desc = desc;
hash_add(engine->cmd_hash, &desc_node->node,
desc->cmd.value & CMD_HASH_MASK);
cmd_header_key(desc->cmd.value));
}
}
@ -746,17 +769,15 @@ static void fini_hash_table(struct intel_engine_cs *engine)
* Optionally initializes fields related to batch buffer command parsing in the
* struct intel_engine_cs based on whether the platform requires software
* command parsing.
*
* Return: non-zero if initialization fails
*/
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
int ret;
if (!IS_GEN7(engine->i915))
return 0;
return;
switch (engine->id) {
case RCS:
@ -811,24 +832,27 @@ int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
break;
default:
MISSING_CASE(engine->id);
BUG();
return;
}
BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count));
BUG_ON(!validate_regs_sorted(engine));
WARN_ON(!hash_empty(engine->cmd_hash));
if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
DRM_ERROR("%s: command descriptions are not sorted\n",
engine->name);
return;
}
if (!validate_regs_sorted(engine)) {
DRM_ERROR("%s: registers are not sorted\n", engine->name);
return;
}
ret = init_hash_table(engine, cmd_tables, cmd_table_count);
if (ret) {
DRM_ERROR("CMD: cmd_parser_init failed!\n");
DRM_ERROR("%s: initialised failed!\n", engine->name);
fini_hash_table(engine);
return ret;
return;
}
engine->needs_cmd_parser = true;
return 0;
}
/**
@ -853,12 +877,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
struct cmd_node *desc_node;
hash_for_each_possible(engine->cmd_hash, desc_node, node,
cmd_header & CMD_HASH_MASK) {
cmd_header_key(cmd_header)) {
const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
u32 masked_cmd = desc->cmd.mask & cmd_header;
u32 masked_value = desc->cmd.value & desc->cmd.mask;
if (masked_cmd == masked_value)
if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
return desc;
}
@ -876,11 +897,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
u32 cmd_header,
const struct drm_i915_cmd_descriptor *desc,
struct drm_i915_cmd_descriptor *default_desc)
{
const struct drm_i915_cmd_descriptor *desc;
u32 mask;
if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
return desc;
desc = find_cmd_in_table(engine, cmd_header);
if (desc)
return desc;
@ -889,140 +913,127 @@ find_cmd(struct intel_engine_cs *engine,
if (!mask)
return NULL;
BUG_ON(!default_desc);
default_desc->flags = CMD_DESC_SKIP;
default_desc->cmd.value = cmd_header;
default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT;
default_desc->length.mask = mask;
default_desc->flags = CMD_DESC_SKIP;
return default_desc;
}
static const struct drm_i915_reg_descriptor *
find_reg(const struct drm_i915_reg_descriptor *table,
int count, u32 addr)
__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
{
int i;
for (i = 0; i < count; i++) {
if (i915_mmio_reg_offset(table[i].addr) == addr)
return &table[i];
int start = 0, end = count;
while (start < end) {
int mid = start + (end - start) / 2;
int ret = addr - i915_mmio_reg_offset(table[mid].addr);
if (ret < 0)
end = mid;
else if (ret > 0)
start = mid + 1;
else
return &table[mid];
}
return NULL;
}
static const struct drm_i915_reg_descriptor *
find_reg_in_tables(const struct drm_i915_reg_table *tables,
int count, bool is_master, u32 addr)
find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
{
int i;
const struct drm_i915_reg_table *table;
const struct drm_i915_reg_descriptor *reg;
const struct drm_i915_reg_table *table = engine->reg_tables;
int count = engine->reg_table_count;
for (i = 0; i < count; i++) {
table = &tables[i];
do {
if (!table->master || is_master) {
reg = find_reg(table->regs, table->num_regs,
addr);
const struct drm_i915_reg_descriptor *reg;
reg = __find_reg(table->regs, table->num_regs, addr);
if (reg != NULL)
return reg;
}
}
} while (table++, --count);
return NULL;
}
static u32 *vmap_batch(struct drm_i915_gem_object *obj,
unsigned start, unsigned len)
{
int i;
void *addr = NULL;
struct sg_page_iter sg_iter;
int first_page = start >> PAGE_SHIFT;
int last_page = (len + start + 4095) >> PAGE_SHIFT;
int npages = last_page - first_page;
struct page **pages;
pages = drm_malloc_ab(npages, sizeof(*pages));
if (pages == NULL) {
DRM_DEBUG_DRIVER("Failed to get space for pages\n");
goto finish;
}
i = 0;
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
pages[i++] = sg_page_iter_page(&sg_iter);
if (i == npages)
break;
}
addr = vmap(pages, i, 0, PAGE_KERNEL);
if (addr == NULL) {
DRM_DEBUG_DRIVER("Failed to vmap pages\n");
goto finish;
}
finish:
if (pages)
drm_free_large(pages);
return (u32*)addr;
}
/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
u32 batch_start_offset,
u32 batch_len)
u32 batch_len,
bool *needs_clflush_after)
{
int needs_clflush = 0;
void *src_base, *src;
void *dst = NULL;
unsigned int src_needs_clflush;
unsigned int dst_needs_clflush;
void *dst, *src;
int ret;
if (batch_len > dest_obj->base.size ||
batch_len + batch_start_offset > src_obj->base.size)
return ERR_PTR(-E2BIG);
if (WARN_ON(dest_obj->pages_pin_count == 0))
return ERR_PTR(-ENODEV);
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
if (ret) {
DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
if (ret)
return ERR_PTR(ret);
}
src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
if (!src_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
ret = -ENOMEM;
ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
if (ret) {
dst = ERR_PTR(ret);
goto unpin_src;
}
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
goto unmap_src;
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
if (IS_ERR(dst))
goto unpin_dst;
src = ERR_PTR(-ENODEV);
if (src_needs_clflush &&
i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
i915_memcpy_from_wc(dst,
src + batch_start_offset,
ALIGN(batch_len, 16));
i915_gem_object_unpin_map(src_obj);
}
}
if (IS_ERR(src)) {
void *ptr;
int offset, n;
offset = offset_in_page(batch_start_offset);
/* We can avoid clflushing partial cachelines before the write
* if we only every write full cache-lines. Since we know that
* both the source and destination are in multiples of
* PAGE_SIZE, we can simply round up to the next cacheline.
* We don't care about copying too much here as we only
* validate up to the end of the batch.
*/
if (dst_needs_clflush & CLFLUSH_BEFORE)
batch_len = roundup(batch_len,
boot_cpu_data.x86_clflush_size);
ptr = dst;
for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
int len = min_t(int, batch_len, PAGE_SIZE - offset);
src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (src_needs_clflush)
drm_clflush_virt_range(src + offset, len);
memcpy(ptr, src + offset, len);
kunmap_atomic(src);
ptr += len;
batch_len -= len;
offset = 0;
}
}
dst = vmap_batch(dest_obj, 0, batch_len);
if (!dst) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
ret = -ENOMEM;
goto unmap_src;
}
/* dst_obj is returned with vmap pinned */
*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
src = src_base + offset_in_page(batch_start_offset);
if (needs_clflush)
drm_clflush_virt_range(src, batch_len);
memcpy(dst, src, batch_len);
unmap_src:
vunmap(src_base);
unpin_dst:
i915_gem_obj_finish_shmem_access(dst_obj);
unpin_src:
i915_gem_object_unpin_pages(src_obj);
return ret ? ERR_PTR(ret) : dst;
i915_gem_obj_finish_shmem_access(src_obj);
return dst;
}
/**
@ -1052,6 +1063,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
const bool is_master,
bool *oacontrol_set)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
if (desc->flags & CMD_DESC_REJECT) {
DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
return false;
@ -1076,10 +1090,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
find_reg_in_tables(engine->reg_tables,
engine->reg_table_count,
is_master,
reg_addr);
find_reg(engine, is_master, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
@ -1200,16 +1211,19 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 batch_len,
bool is_master)
{
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
u32 *cmd, *batch_end;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
bool needs_clflush_after = false;
int ret = 0;
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
cmd = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len,
&needs_clflush_after);
if (IS_ERR(cmd)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
return PTR_ERR(batch_base);
return PTR_ERR(cmd);
}
/*
@ -1217,17 +1231,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
* large or larger and copy_batch() will write MI_NOPs to the extra
* space. Parsing should be faster in some cases this way.
*/
batch_end = batch_base + (batch_len / sizeof(*batch_end));
cmd = batch_base;
batch_end = cmd + (batch_len / sizeof(*batch_end));
while (cmd < batch_end) {
const struct drm_i915_cmd_descriptor *desc;
u32 length;
if (*cmd == MI_BATCH_BUFFER_END)
break;
desc = find_cmd(engine, *cmd, &default_desc);
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
@ -1278,7 +1289,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
ret = -EINVAL;
}
vunmap(batch_base);
if (ret == 0 && needs_clflush_after)
drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}

View file

@ -40,12 +40,6 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
enum {
ACTIVE_LIST,
INACTIVE_LIST,
PINNED_LIST,
};
/* As the drm_debugfs_init() routines are called before dev->dev_private is
* allocated we need to hook into the minor for release. */
static int
@ -111,7 +105,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
static char get_global_flag(struct drm_i915_gem_object *obj)
{
return i915_gem_obj_to_ggtt(obj) ? 'g' : ' ';
return i915_gem_object_to_ggtt(obj, NULL) ? 'g' : ' ';
}
static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
@ -158,11 +152,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
seq_printf(m, "%x ",
i915_gem_active_get_seqno(&obj->last_read[id],
&obj->base.dev->struct_mutex));
seq_printf(m, "] %x %x%s%s%s",
seq_printf(m, "] %x %s%s%s",
i915_gem_active_get_seqno(&obj->last_write,
&obj->base.dev->struct_mutex),
i915_gem_active_get_seqno(&obj->last_fence,
&obj->base.dev->struct_mutex),
i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
obj->dirty ? " dirty" : "",
obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@ -175,8 +167,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_display)
seq_printf(m, " (display)");
if (obj->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@ -186,6 +176,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
vma->node.start, vma->node.size);
if (i915_vma_is_ggtt(vma))
seq_printf(m, ", type: %u", vma->ggtt_view.type);
if (vma->fence)
seq_printf(m, " , fence: %d%s",
vma->fence->id,
i915_gem_active_isset(&vma->last_fence) ? "*" : "");
seq_puts(m, ")");
}
if (obj->stolen)
@ -210,53 +204,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
}
static int i915_gem_object_list_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
uintptr_t list = (uintptr_t) node->info_ent->data;
struct list_head *head;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_vma *vma;
u64 total_obj_size, total_gtt_size;
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
/* FIXME: the user of this interface might want more than just GGTT */
switch (list) {
case ACTIVE_LIST:
seq_puts(m, "Active:\n");
head = &ggtt->base.active_list;
break;
case INACTIVE_LIST:
seq_puts(m, "Inactive:\n");
head = &ggtt->base.inactive_list;
break;
default:
mutex_unlock(&dev->struct_mutex);
return -EINVAL;
}
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(vma, head, vm_link) {
seq_printf(m, " ");
describe_obj(m, vma->obj);
seq_printf(m, "\n");
total_obj_size += vma->obj->base.size;
total_gtt_size += vma->node.size;
count++;
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
static int obj_rank_by_stolen(void *priv,
struct list_head *A, struct list_head *B)
{
@ -322,17 +269,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
return 0;
}
#define count_objects(list, member) do { \
list_for_each_entry(obj, list, member) { \
size += i915_gem_obj_total_ggtt_size(obj); \
++count; \
if (obj->map_and_fenceable) { \
mappable_size += i915_gem_obj_ggtt_size(obj); \
++mappable_count; \
} \
} \
} while (0)
struct file_stats {
struct drm_i915_file_private *file_priv;
unsigned long count;
@ -418,9 +354,9 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
if (ctx->engine[n].state)
per_file_stats(0, ctx->engine[n].state, data);
per_file_stats(0, ctx->engine[n].state->obj, data);
if (ctx->engine[n].ring)
per_file_stats(0, ctx->engine[n].ring->obj, data);
per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
}
return 0;
@ -447,30 +383,16 @@ static void print_context_stats(struct seq_file *m,
print_file_stats(m, "[k]contexts", stats);
}
#define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_total_ggtt_size(vma->obj); \
++count; \
if (vma->obj->map_and_fenceable) { \
mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
++mappable_count; \
} \
} \
} while (0)
static int i915_gem_object_info(struct seq_file *m, void* data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
u32 count, mappable_count, purgeable_count;
u64 size, mappable_size, purgeable_size;
unsigned long pin_mapped_count = 0, pin_mapped_purgeable_count = 0;
u64 pin_mapped_size = 0, pin_mapped_purgeable_size = 0;
u32 count, mapped_count, purgeable_count, dpy_count;
u64 size, mapped_size, purgeable_size, dpy_size;
struct drm_i915_gem_object *obj;
struct drm_file *file;
struct i915_vma *vma;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
@ -481,70 +403,53 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
dev_priv->mm.object_count,
dev_priv->mm.object_memory);
size = count = mappable_size = mappable_count = 0;
count_objects(&dev_priv->mm.bound_list, global_list);
seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n",
count, mappable_count, size, mappable_size);
size = count = mappable_size = mappable_count = 0;
count_vmas(&ggtt->base.active_list, vm_link);
seq_printf(m, " %u [%u] active objects, %llu [%llu] bytes\n",
count, mappable_count, size, mappable_size);
size = count = mappable_size = mappable_count = 0;
count_vmas(&ggtt->base.inactive_list, vm_link);
seq_printf(m, " %u [%u] inactive objects, %llu [%llu] bytes\n",
count, mappable_count, size, mappable_size);
size = count = purgeable_size = purgeable_count = 0;
size = count = 0;
mapped_size = mapped_count = 0;
purgeable_size = purgeable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
size += obj->base.size, ++count;
if (obj->madv == I915_MADV_DONTNEED)
purgeable_size += obj->base.size, ++purgeable_count;
if (obj->mapping) {
pin_mapped_count++;
pin_mapped_size += obj->base.size;
if (obj->pages_pin_count == 0) {
pin_mapped_purgeable_count++;
pin_mapped_purgeable_size += obj->base.size;
}
}
}
seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
size += obj->base.size;
++count;
size = count = mappable_size = mappable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->fault_mappable) {
size += i915_gem_obj_ggtt_size(obj);
++count;
}
if (obj->pin_display) {
mappable_size += i915_gem_obj_ggtt_size(obj);
++mappable_count;
}
if (obj->madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
if (obj->mapping) {
pin_mapped_count++;
pin_mapped_size += obj->base.size;
if (obj->pages_pin_count == 0) {
pin_mapped_purgeable_count++;
pin_mapped_purgeable_size += obj->base.size;
}
mapped_count++;
mapped_size += obj->base.size;
}
}
seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
size = count = dpy_size = dpy_count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
size += obj->base.size;
++count;
if (obj->pin_display) {
dpy_size += obj->base.size;
++dpy_count;
}
if (obj->madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
if (obj->mapping) {
mapped_count++;
mapped_size += obj->base.size;
}
}
seq_printf(m, "%u bound objects, %llu bytes\n",
count, size);
seq_printf(m, "%u purgeable objects, %llu bytes\n",
purgeable_count, purgeable_size);
seq_printf(m, "%u pinned mappable objects, %llu bytes\n",
mappable_count, mappable_size);
seq_printf(m, "%u fault mappable objects, %llu bytes\n",
count, size);
seq_printf(m,
"%lu [%lu] pin mapped objects, %llu [%llu] bytes [purgeable]\n",
pin_mapped_count, pin_mapped_purgeable_count,
pin_mapped_size, pin_mapped_purgeable_size);
seq_printf(m, "%u mapped objects, %llu bytes\n",
mapped_count, mapped_size);
seq_printf(m, "%u display objects (pinned), %llu bytes\n",
dpy_count, dpy_size);
seq_printf(m, "%llu [%llu] gtt total\n",
ggtt->base.total, ggtt->mappable_end - ggtt->base.start);
@ -557,6 +462,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
print_context_stats(m, dev_priv);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_request *request;
struct task_struct *task;
memset(&stats, 0, sizeof(stats));
@ -570,10 +477,17 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
mutex_lock(&dev->struct_mutex);
request = list_first_entry_or_null(&file_priv->mm.request_list,
struct drm_i915_gem_request,
client_list);
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
task = pid_task(request && request->ctx->pid ?
request->ctx->pid : file->pid,
PIDTYPE_PID);
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
mutex_unlock(&dev->struct_mutex);
}
mutex_unlock(&dev->filelist_mutex);
@ -584,8 +498,8 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
uintptr_t list = (uintptr_t) node->info_ent->data;
struct drm_i915_private *dev_priv = to_i915(dev);
bool show_pin_display_only = !!data;
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
int count, ret;
@ -596,7 +510,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
if (show_pin_display_only && !obj->pin_display)
continue;
seq_puts(m, " ");
@ -755,12 +669,11 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
seq_printf(m, "%s requests: %d\n", engine->name, count);
list_for_each_entry(req, &engine->request_list, link) {
struct pid *pid = req->ctx->pid;
struct task_struct *task;
rcu_read_lock();
task = NULL;
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
seq_printf(m, " %x @ %d: %s [%d]\n",
req->fence.seqno,
(int) (jiffies - req->emitted_jiffies),
@ -787,8 +700,6 @@ static void i915_ring_seqno_info(struct seq_file *m,
seq_printf(m, "Current sequence (%s): %x\n",
engine->name, intel_engine_get_seqno(engine));
seq_printf(m, "Current user interrupts (%s): %lx\n",
engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
spin_lock(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@ -1027,14 +938,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
struct i915_vma *vma = dev_priv->fence_regs[i].vma;
seq_printf(m, "Fence %d, pin count = %d, object = ",
i, dev_priv->fence_regs[i].pin_count);
if (obj == NULL)
if (!vma)
seq_puts(m, "unused");
else
describe_obj(m, obj);
describe_obj(m, vma->obj);
seq_putc(m, '\n');
}
@ -1434,11 +1345,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
engine->hangcheck.seqno,
seqno[id],
engine->last_submitted_seqno);
seq_printf(m, "\twaiters? %d\n",
intel_engine_has_waiter(engine));
seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
engine->hangcheck.user_interrupts,
READ_ONCE(engine->breadcrumbs.irq_wakeups));
seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
yesno(intel_engine_has_waiter(engine)),
yesno(test_bit(engine->id,
&dev_priv->gpu_error.missed_irq_rings)));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
@ -2052,18 +1962,17 @@ static int i915_context_status(struct seq_file *m, void *unused)
list_for_each_entry(ctx, &dev_priv->context_list, link) {
seq_printf(m, "HW context %u ", ctx->hw_id);
if (IS_ERR(ctx->file_priv)) {
seq_puts(m, "(deleted) ");
} else if (ctx->file_priv) {
struct pid *pid = ctx->file_priv->file->pid;
if (ctx->pid) {
struct task_struct *task;
task = get_pid_task(pid, PIDTYPE_PID);
task = get_pid_task(ctx->pid, PIDTYPE_PID);
if (task) {
seq_printf(m, "(%s [%d]) ",
task->comm, task->pid);
put_task_struct(task);
}
} else if (IS_ERR(ctx->file_priv)) {
seq_puts(m, "(deleted) ");
} else {
seq_puts(m, "(kernel) ");
}
@ -2077,7 +1986,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_printf(m, "%s: ", engine->name);
seq_putc(m, ce->initialised ? 'I' : 'i');
if (ce->state)
describe_obj(m, ce->state);
describe_obj(m, ce->state->obj);
if (ce->ring)
describe_ctx_ring(m, ce->ring);
seq_putc(m, '\n');
@ -2095,36 +2004,34 @@ static void i915_dump_lrc_obj(struct seq_file *m,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
struct i915_vma *vma = ctx->engine[engine->id].state;
struct page *page;
uint32_t *reg_state;
int j;
unsigned long ggtt_offset = 0;
seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
if (ctx_obj == NULL) {
seq_puts(m, "\tNot allocated\n");
if (!vma) {
seq_puts(m, "\tFake context\n");
return;
}
if (!i915_gem_obj_ggtt_bound(ctx_obj))
seq_puts(m, "\tNot bound in GGTT\n");
else
ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
if (vma->flags & I915_VMA_GLOBAL_BIND)
seq_printf(m, "\tBound in GGTT at 0x%08x\n",
i915_ggtt_offset(vma));
if (i915_gem_object_get_pages(ctx_obj)) {
seq_puts(m, "\tFailed to get pages for context object\n");
if (i915_gem_object_get_pages(vma->obj)) {
seq_puts(m, "\tFailed to get pages for context object\n\n");
return;
}
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
if (!WARN_ON(page == NULL)) {
reg_state = kmap_atomic(page);
page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN);
if (page) {
u32 *reg_state = kmap_atomic(page);
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
ggtt_offset + 4096 + (j * 4),
seq_printf(m,
"\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
j * 4,
reg_state[j], reg_state[j + 1],
reg_state[j + 2], reg_state[j + 3]);
}
@ -2444,6 +2351,20 @@ static int count_irq_waiters(struct drm_i915_private *i915)
return count;
}
static const char *rps_power_to_str(unsigned int power)
{
static const char * const strings[] = {
[LOW_POWER] = "low power",
[BETWEEN] = "mixed",
[HIGH_POWER] = "high power",
};
if (power >= ARRAY_SIZE(strings) || !strings[power])
return "unknown";
return strings[power];
}
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
@ -2455,12 +2376,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
seq_printf(m, "GPU busy? %s [%x]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
seq_printf(m, "Frequency requested %d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
mutex_lock(&dev->filelist_mutex);
spin_lock(&dev_priv->rps.client_lock);
@ -2481,6 +2407,31 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
spin_unlock(&dev_priv->rps.client_lock);
mutex_unlock(&dev->filelist_mutex);
if (INTEL_GEN(dev_priv) >= 6 &&
dev_priv->rps.enabled &&
dev_priv->gt.active_engines) {
u32 rpup, rpupei;
u32 rpdown, rpdownei;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
rpup = I915_READ_FW(GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
rpupei = I915_READ_FW(GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
rpdown = I915_READ_FW(GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
rpdownei = I915_READ_FW(GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
rps_power_to_str(dev_priv->rps.power));
seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
100 * rpup / rpupei,
dev_priv->rps.up_threshold);
seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
100 * rpdown / rpdownei,
dev_priv->rps.down_threshold);
} else {
seq_puts(m, "\nRPS Autotuning inactive\n");
}
return 0;
}
@ -2547,6 +2498,7 @@ static void i915_guc_client_info(struct seq_file *m,
struct i915_guc_client *client)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
uint64_t tot = 0;
seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@ -2557,15 +2509,14 @@ static void i915_guc_client_info(struct seq_file *m,
client->wq_size, client->wq_offset, client->wq_tail);
seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
seq_printf(m, "\tLast submission result: %d\n", client->retcode);
for_each_engine(engine, dev_priv) {
for_each_engine_id(engine, dev_priv, id) {
u64 submissions = client->submissions[id];
tot += submissions;
seq_printf(m, "\tSubmissions: %llu %s\n",
client->submissions[engine->id],
engine->name);
tot += client->submissions[engine->id];
submissions, engine->name);
}
seq_printf(m, "\tTotal: %llu\n", tot);
}
@ -2578,6 +2529,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
struct intel_guc guc;
struct i915_guc_client client = {};
struct intel_engine_cs *engine;
enum intel_engine_id id;
u64 total = 0;
if (!HAS_GUC_SCHED(dev_priv))
@ -2604,11 +2556,11 @@ static int i915_guc_info(struct seq_file *m, void *data)
seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
seq_printf(m, "\nGuC submissions:\n");
for_each_engine(engine, dev_priv) {
for_each_engine_id(engine, dev_priv, id) {
u64 submissions = guc.submissions[id];
total += submissions;
seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
engine->name, guc.submissions[engine->id],
guc.last_seqno[engine->id]);
total += guc.submissions[engine->id];
engine->name, submissions, guc.last_seqno[id]);
}
seq_printf(m, "\t%s: %llu\n", "Total", total);
@ -2625,15 +2577,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
u32 *log;
struct drm_i915_gem_object *obj;
int i = 0, pg;
if (!log_obj)
if (!dev_priv->guc.log_vma)
return 0;
for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
obj = dev_priv->guc.log_vma->obj;
for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
@ -3237,7 +3189,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
int num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
int num_rings = INTEL_INFO(dev)->num_rings;
enum intel_engine_id id;
int j, ret;
@ -3255,7 +3207,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
struct page *page;
uint64_t *seqno;
page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0);
seqno = (uint64_t *)kmap_atomic(page);
for_each_engine_id(engine, dev_priv, id) {
@ -5386,9 +5338,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_capabilities", i915_capabilities, 0},
{"i915_gem_objects", i915_gem_object_info, 0},
{"i915_gem_gtt", i915_gem_gtt_info, 0},
{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
{"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1},
{"i915_gem_stolen", i915_gem_stolen_list_info },
{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
{"i915_gem_request", i915_gem_request_info, 0},

View file

@ -827,6 +827,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
mutex_init(&dev_priv->wm.wm_mutex);
mutex_init(&dev_priv->pps_mutex);
i915_memcpy_init_early(dev_priv);
ret = i915_workqueues_init(dev_priv);
if (ret < 0)
return ret;
@ -1560,6 +1562,7 @@ static int i915_drm_resume(struct drm_device *dev)
i915_gem_resume(dev);
i915_restore_state(dev);
intel_pps_unlock_regs_wa(dev_priv);
intel_opregion_setup(dev_priv);
intel_init_pch_refclk(dev);

View file

@ -70,7 +70,7 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
#define DRIVER_DATE "20160808"
#define DRIVER_DATE "20160822"
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
@ -455,15 +455,21 @@ struct intel_opregion {
struct intel_overlay;
struct intel_overlay_error_state;
#define I915_FENCE_REG_NONE -1
#define I915_MAX_NUM_FENCES 32
/* 32 fences + sign bit for FENCE_REG_NONE */
#define I915_MAX_NUM_FENCE_BITS 6
struct drm_i915_fence_reg {
struct list_head lru_list;
struct drm_i915_gem_object *obj;
struct list_head link;
struct drm_i915_private *i915;
struct i915_vma *vma;
int pin_count;
int id;
/**
* Whether the tiling parameters for the currently
* associated fence register have changed. Note that
* for the purposes of tracking tiling changes we also
* treat the unfenced register, the register slot that
* the object occupies whilst it executes a fenced
* command (such as BLT on gen2/3), as a "fence".
*/
bool dirty;
};
struct sdvo_device_mapping {
@ -475,130 +481,6 @@ struct sdvo_device_mapping {
u8 ddc_pin;
};
struct intel_display_error_state;
struct drm_i915_error_state {
struct kref ref;
struct timeval time;
char error_msg[128];
bool simulated;
int iommu;
u32 reset_count;
u32 suspend_count;
/* Generic register state */
u32 eir;
u32 pgtbl_er;
u32 ier;
u32 gtier[4];
u32 ccid;
u32 derrmr;
u32 forcewake;
u32 error; /* gen6+ */
u32 err_int; /* gen7 */
u32 fault_data0; /* gen8, gen9 */
u32 fault_data1; /* gen8, gen9 */
u32 done_reg;
u32 gac_eco;
u32 gam_ecochk;
u32 gab_ctl;
u32 gfx_mode;
u32 extra_instdone[I915_NUM_INSTDONE_REG];
u64 fence[I915_MAX_NUM_FENCES];
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
struct drm_i915_error_object *semaphore_obj;
struct drm_i915_error_engine {
int engine_id;
/* Software tracked state */
bool waiting;
int num_waiters;
int hangcheck_score;
enum intel_engine_hangcheck_action hangcheck_action;
int num_requests;
/* our own tracking of ring head and tail */
u32 cpu_ring_head;
u32 cpu_ring_tail;
u32 last_seqno;
u32 semaphore_seqno[I915_NUM_ENGINES - 1];
/* Register state */
u32 start;
u32 tail;
u32 head;
u32 ctl;
u32 hws;
u32 ipeir;
u32 ipehr;
u32 instdone;
u32 bbstate;
u32 instpm;
u32 instps;
u32 seqno;
u64 bbaddr;
u64 acthd;
u32 fault_reg;
u64 faddr;
u32 rc_psmi; /* sleep state */
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
struct drm_i915_error_object {
int page_count;
u64 gtt_offset;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
struct drm_i915_error_object *wa_ctx;
struct drm_i915_error_request {
long jiffies;
u32 seqno;
u32 tail;
} *requests;
struct drm_i915_error_waiter {
char comm[TASK_COMM_LEN];
pid_t pid;
u32 seqno;
} *waiters;
struct {
u32 gfx_mode;
union {
u64 pdp[4];
u32 pp_dir_base;
};
} vm_info;
pid_t pid;
char comm[TASK_COMM_LEN];
} engine[I915_NUM_ENGINES];
struct drm_i915_error_buffer {
u32 size;
u32 name;
u32 rseqno[I915_NUM_ENGINES], wseqno;
u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
s32 pinned:2;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
s32 engine:4;
u32 cache_level:3;
} **active_bo, **pinned_bo;
u32 *active_bo_count, *pinned_bo_count;
u32 vm_count;
};
struct intel_connector;
struct intel_encoder;
struct intel_crtc_state;
@ -793,6 +675,7 @@ struct intel_device_info {
u8 gen;
u16 gen_mask;
u8 ring_mask; /* Rings supported by the HW */
u8 num_rings;
DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON);
/* Register offsets for the various display pipes and transcoders */
int pipe_offsets[I915_MAX_TRANSCODERS];
@ -822,6 +705,134 @@ struct intel_device_info {
#undef DEFINE_FLAG
#undef SEP_SEMICOLON
struct intel_display_error_state;
struct drm_i915_error_state {
struct kref ref;
struct timeval time;
char error_msg[128];
bool simulated;
int iommu;
u32 reset_count;
u32 suspend_count;
struct intel_device_info device_info;
/* Generic register state */
u32 eir;
u32 pgtbl_er;
u32 ier;
u32 gtier[4];
u32 ccid;
u32 derrmr;
u32 forcewake;
u32 error; /* gen6+ */
u32 err_int; /* gen7 */
u32 fault_data0; /* gen8, gen9 */
u32 fault_data1; /* gen8, gen9 */
u32 done_reg;
u32 gac_eco;
u32 gam_ecochk;
u32 gab_ctl;
u32 gfx_mode;
u32 extra_instdone[I915_NUM_INSTDONE_REG];
u64 fence[I915_MAX_NUM_FENCES];
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
struct drm_i915_error_object *semaphore;
struct drm_i915_error_engine {
int engine_id;
/* Software tracked state */
bool waiting;
int num_waiters;
int hangcheck_score;
enum intel_engine_hangcheck_action hangcheck_action;
struct i915_address_space *vm;
int num_requests;
/* our own tracking of ring head and tail */
u32 cpu_ring_head;
u32 cpu_ring_tail;
u32 last_seqno;
u32 semaphore_seqno[I915_NUM_ENGINES - 1];
/* Register state */
u32 start;
u32 tail;
u32 head;
u32 ctl;
u32 mode;
u32 hws;
u32 ipeir;
u32 ipehr;
u32 instdone;
u32 bbstate;
u32 instpm;
u32 instps;
u32 seqno;
u64 bbaddr;
u64 acthd;
u32 fault_reg;
u64 faddr;
u32 rc_psmi; /* sleep state */
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
struct drm_i915_error_object {
int page_count;
u64 gtt_offset;
u64 gtt_size;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
struct drm_i915_error_object *wa_ctx;
struct drm_i915_error_request {
long jiffies;
pid_t pid;
u32 seqno;
u32 head;
u32 tail;
} *requests;
struct drm_i915_error_waiter {
char comm[TASK_COMM_LEN];
pid_t pid;
u32 seqno;
} *waiters;
struct {
u32 gfx_mode;
union {
u64 pdp[4];
u32 pp_dir_base;
};
} vm_info;
pid_t pid;
char comm[TASK_COMM_LEN];
} engine[I915_NUM_ENGINES];
struct drm_i915_error_buffer {
u32 size;
u32 name;
u32 rseqno[I915_NUM_ENGINES], wseqno;
u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
s32 engine:4;
u32 cache_level:3;
} *active_bo[I915_NUM_ENGINES], *pinned_bo;
u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
struct i915_address_space *active_vm[I915_NUM_ENGINES];
};
enum i915_cache_level {
I915_CACHE_NONE = 0,
I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
@ -878,22 +889,23 @@ struct i915_gem_context {
struct drm_i915_private *i915;
struct drm_i915_file_private *file_priv;
struct i915_hw_ppgtt *ppgtt;
struct pid *pid;
struct i915_ctx_hang_stats hang_stats;
/* Unique identifier for this context, used by the hw for tracking */
unsigned long flags;
#define CONTEXT_NO_ZEROMAP BIT(0)
#define CONTEXT_NO_ERROR_CAPTURE BIT(1)
unsigned hw_id;
/* Unique identifier for this context, used by the hw for tracking */
unsigned int hw_id;
u32 user_handle;
u32 ggtt_alignment;
struct intel_context {
struct drm_i915_gem_object *state;
struct i915_vma *state;
struct intel_ring *ring;
struct i915_vma *lrc_vma;
uint32_t *lrc_reg_state;
u64 lrc_desc;
int pin_count;
@ -1061,13 +1073,6 @@ struct intel_gmbus {
struct i915_suspend_saved_registers {
u32 saveDSPARB;
u32 saveLVDS;
u32 savePP_ON_DELAYS;
u32 savePP_OFF_DELAYS;
u32 savePP_ON;
u32 savePP_OFF;
u32 savePP_CONTROL;
u32 savePP_DIVISOR;
u32 saveFBC_CONTROL;
u32 saveCACHE_MODE_0;
u32 saveMI_ARB_STATE;
@ -1749,12 +1754,14 @@ struct drm_i915_private {
uint32_t psr_mmio_base;
uint32_t pps_mmio_base;
wait_queue_head_t gmbus_wait_queue;
struct pci_dev *bridge_dev;
struct i915_gem_context *kernel_context;
struct intel_engine_cs engine[I915_NUM_ENGINES];
struct drm_i915_gem_object *semaphore_obj;
struct i915_vma *semaphore;
u32 next_seqno;
struct drm_dma_handle *status_page_dmah;
@ -1840,6 +1847,7 @@ struct drm_i915_private {
enum modeset_restore modeset_restore;
struct mutex modeset_restore_lock;
struct drm_atomic_state *modeset_restore_state;
struct drm_modeset_acquire_ctx reset_ctx;
struct list_head vm_list; /* Global list of all address spaces */
struct i915_ggtt ggtt; /* VM representing the global address space */
@ -2170,33 +2178,11 @@ struct drm_i915_gem_object {
*/
unsigned int dirty:1;
/**
* Fence register bits (if any) for this object. Will be set
* as needed when mapped into the GTT.
* Protected by dev->struct_mutex.
*/
signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
/**
* Advice: are the backing pages purgeable?
*/
unsigned int madv:2;
/**
* Whether the tiling parameters for the currently associated fence
* register have changed. Note that for the purposes of tracking
* tiling changes we also treat the unfenced register, the register
* slot that the object occupies whilst it executes a fenced
* command (such as BLT on gen2/3), as a "fence".
*/
unsigned int fence_dirty:1;
/**
* Is the object at the current location in the gtt mappable and
* fenceable? Used to avoid costly recalculations.
*/
unsigned int map_and_fenceable:1;
/**
* Whether the current gtt mapping needs to be mappable (and isn't just
* mappable by accident). Track pin and fault separate for a more
@ -2213,6 +2199,7 @@ struct drm_i915_gem_object {
unsigned int cache_dirty:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
/** Current tiling stride for the object, if it's tiled. */
unsigned int tiling_and_stride;
@ -2220,7 +2207,6 @@ struct drm_i915_gem_object {
#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
#define STRIDE_MASK (~TILING_MASK)
unsigned int has_wc_mmap;
/** Count of VMA actually bound by this object */
unsigned int bind_count;
unsigned int pin_display;
@ -2245,7 +2231,6 @@ struct drm_i915_gem_object {
*/
struct i915_gem_active last_read[I915_NUM_ENGINES];
struct i915_gem_active last_write;
struct i915_gem_active last_fence;
/** References from framebuffers, locks out tiling changes. */
unsigned long framebuffer_references;
@ -2375,6 +2360,18 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
return obj->tiling_and_stride & STRIDE_MASK;
}
static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
{
i915_gem_object_get(vma->obj);
return vma;
}
static inline void i915_vma_put(struct i915_vma *vma)
{
lockdep_assert_held(&vma->vm->dev->struct_mutex);
i915_gem_object_put(vma->obj);
}
/*
* Optimised SGL iterator for GEM objects
*/
@ -3066,7 +3063,7 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
void i915_gem_free_object(struct drm_gem_object *obj);
int __must_check
struct i915_vma * __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
u64 size,
@ -3085,9 +3082,6 @@ int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
int *needs_clflush);
int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
static inline int __sg_page_count(struct scatterlist *sg)
@ -3147,13 +3141,20 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
obj->pages_pin_count--;
}
enum i915_map_type {
I915_MAP_WB = 0,
I915_MAP_WC,
};
/**
* i915_gem_object_pin_map - return a contiguous mapping of the entire object
* @obj - the object to map into kernel address space
* @type - the type of mapping, used to select pgprot_t
*
* Calls i915_gem_object_pin_pages() to prevent reaping of the object's
* pages and then returns a contiguous mapping of the backing storage into
* the kernel address space.
* the kernel address space. Based on the @type of mapping, the PTE will be
* set to either WriteBack or WriteCombine (via pgprot_t).
*
* The caller must hold the struct_mutex, and is responsible for calling
* i915_gem_object_unpin_map() when the mapping is no longer required.
@ -3161,7 +3162,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
* Returns the pointer through which to access the mapped object, or an
* ERR_PTR() on error.
*/
void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj);
void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
enum i915_map_type type);
/**
* i915_gem_object_unpin_map - releases an earlier mapping
@ -3180,6 +3182,20 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
i915_gem_object_unpin_pages(obj);
}
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush);
int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush);
#define CLFLUSH_BEFORE 0x1
#define CLFLUSH_AFTER 0x2
#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
static inline void
i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
{
i915_gem_object_unpin_pages(obj);
}
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *to);
@ -3262,12 +3278,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
int __must_check
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
int __must_check
struct i915_vma * __must_check
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 alignment,
const struct i915_ggtt_view *view);
void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
int align);
int i915_gem_open(struct drm_device *dev, struct drm_file *file);
@ -3287,71 +3302,81 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gem_obj, int flags);
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
const struct i915_ggtt_view *view);
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
struct i915_address_space *vm);
static inline u64
i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
{
return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal);
}
bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
const struct i915_ggtt_view *view);
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
struct i915_address_space *vm);
struct i915_vma *
i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
struct i915_address_space *vm);
struct i915_vma *
i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
struct i915_address_space *vm,
const struct i915_ggtt_view *view);
struct i915_vma *
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
struct i915_address_space *vm);
struct i915_vma *
i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
struct i915_address_space *vm,
const struct i915_ggtt_view *view);
static inline struct i915_vma *
i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
{
return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal);
}
bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
/* Some GGTT VM helpers */
static inline struct i915_hw_ppgtt *
i915_vm_to_ppgtt(struct i915_address_space *vm)
{
return container_of(vm, struct i915_hw_ppgtt, base);
}
static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
static inline struct i915_vma *
i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view)
{
return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal);
return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view);
}
unsigned long
i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
static inline void
i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
static inline unsigned long
i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
const struct i915_ggtt_view *view)
{
i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal);
return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view));
}
/* i915_gem_fence.c */
int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
int __must_check i915_vma_get_fence(struct i915_vma *vma);
int __must_check i915_vma_put_fence(struct i915_vma *vma);
bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
/**
* i915_vma_pin_fence - pin fencing state
* @vma: vma to pin fencing for
*
* This pins the fencing state (whether tiled or untiled) to make sure the
* vma (and its object) is ready to be used as a scanout target. Fencing
* status must be synchronize first by calling i915_vma_get_fence():
*
* The resulting fence pin reference must be released again with
* i915_vma_unpin_fence().
*
* Returns:
*
* True if the vma has a fence, false otherwise.
*/
static inline bool
i915_vma_pin_fence(struct i915_vma *vma)
{
if (vma->fence) {
vma->fence->pin_count++;
return true;
} else
return false;
}
/**
* i915_vma_unpin_fence - unpin fencing state
* @vma: vma to unpin fencing for
*
* This releases the fence pin reference acquired through
* i915_vma_pin_fence. It will handle both objects with and without an
* attached fence correctly, callers do not need to distinguish this.
*/
static inline void
i915_vma_unpin_fence(struct i915_vma *vma)
{
if (vma->fence) {
GEM_BUG_ON(vma->fence->pin_count <= 0);
vma->fence->pin_count--;
}
}
void i915_gem_restore_fences(struct drm_device *dev);
@ -3429,6 +3454,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
/* belongs in i915_gem_gtt.h */
static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
{
wmb();
if (INTEL_GEN(dev_priv) < 6)
intel_gtt_chipset_flush();
}
@ -3516,7 +3542,7 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
/* i915_cmd_parser.c */
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine);
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
@ -3848,7 +3874,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
* is woken.
*/
if (engine->irq_seqno_barrier &&
READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
struct task_struct *tsk;
@ -3873,7 +3899,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
* irq_posted == false but we are still running).
*/
rcu_read_lock();
tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
if (tsk && tsk != current)
/* Note that if the bottom-half is changed as we
* are sending the wake-up, the new bottom-half will
@ -3902,4 +3928,32 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
return false;
}
void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
/* i915_mm.c */
int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap);
#define ptr_mask_bits(ptr) ({ \
unsigned long __v = (unsigned long)(ptr); \
(typeof(ptr))(__v & PAGE_MASK); \
})
#define ptr_unpack_bits(ptr, bits) ({ \
unsigned long __v = (unsigned long)(ptr); \
(bits) = __v & ~PAGE_MASK; \
(typeof(ptr))(__v & PAGE_MASK); \
})
#define ptr_pack_bits(ptr, bits) \
((typeof(ptr))((unsigned long)(ptr) | (bits)))
#define fetch_and_zero(ptr) ({ \
typeof(*ptr) __T = *(ptr); \
*(ptr) = (typeof(*ptr))0; \
__T; \
})
#endif

File diff suppressed because it is too large Load diff

View file

@ -155,9 +155,10 @@ void i915_gem_context_free(struct kref *ctx_ref)
if (ce->ring)
intel_ring_free(ce->ring);
i915_gem_object_put(ce->state);
i915_vma_put(ce->state);
}
put_pid(ctx->pid);
list_del(&ctx->link);
ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
@ -281,13 +282,24 @@ __create_hw_context(struct drm_device *dev,
ctx->ggtt_alignment = get_context_alignment(dev_priv);
if (dev_priv->hw_context_size) {
struct drm_i915_gem_object *obj =
i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = i915_gem_alloc_context_obj(dev,
dev_priv->hw_context_size);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_out;
}
ctx->engine[RCS].state = obj;
vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
ret = PTR_ERR(vma);
goto err_out;
}
ctx->engine[RCS].state = vma;
}
/* Default context will never have a file_priv */
@ -300,6 +312,9 @@ __create_hw_context(struct drm_device *dev,
ret = DEFAULT_CONTEXT_HANDLE;
ctx->file_priv = file_priv;
if (file_priv)
ctx->pid = get_task_pid(current, PIDTYPE_PID);
ctx->user_handle = ret;
/* NB: Mark all slices as needing a remap so that when the context first
* loads it will restore whatever remap state already exists. If there
@ -399,7 +414,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
struct intel_context *ce = &ctx->engine[engine->id];
if (ce->state)
i915_gem_object_ggtt_unpin(ce->state);
i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
@ -568,7 +583,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
INTEL_INFO(dev_priv)->num_rings - 1 :
0;
int len, ret;
@ -621,8 +636,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
intel_ring_emit(ring,
i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
flags);
i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@ -651,7 +665,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring, engine->scratch.gtt_offset);
intel_ring_emit(ring,
i915_ggtt_offset(engine->scratch));
intel_ring_emit(ring, MI_NOOP);
}
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
@ -755,6 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
struct i915_gem_context *to = req->ctx;
struct intel_engine_cs *engine = req->engine;
struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
struct i915_vma *vma = to->engine[RCS].state;
struct i915_gem_context *from;
u32 hw_flags;
int ret, i;
@ -762,9 +778,15 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
if (skip_rcs_switch(ppgtt, engine, to))
return 0;
/* Clear this page out of any CPU caches for coherent swap-in/out. */
if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
if (ret)
return ret;
}
/* Trying to pin first makes error handling easier. */
ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
to->ggtt_alignment, 0);
ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL);
if (ret)
return ret;
@ -777,18 +799,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
*/
from = engine->last_context;
/*
* Clear this page out of any CPU caches for coherent swap-in/out. Note
* that thanks to write = false in this call and us not setting any gpu
* write domains when putting a context object onto the active list
* (when switching away from it), this won't block.
*
* XXX: We need a real interface to do this instead of trickery.
*/
ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
if (ret)
goto unpin_out;
if (needs_pd_load_pre(ppgtt, engine, to)) {
/* Older GENs and non render rings still want the load first,
* "PP_DCLV followed by PP_DIR_BASE register through Load
@ -797,7 +807,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
trace_switch_mm(engine, to);
ret = ppgtt->switch_mm(ppgtt, req);
if (ret)
goto unpin_out;
goto err;
}
if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
@ -814,7 +824,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
ret = mi_set_context(req, hw_flags);
if (ret)
goto unpin_out;
goto err;
}
/* The backing object for the context is done after switching to the
@ -824,8 +834,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* MI_SET_CONTEXT instead of when the next seqno has completed.
*/
if (from != NULL) {
struct drm_i915_gem_object *obj = from->engine[RCS].state;
/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
* whole damn pipeline, we don't need to explicitly mark the
* object dirty. The only exception is that the context must be
@ -833,11 +841,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* able to defer doing this until we know the object would be
* swapped, but there is no way to do that yet.
*/
obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
/* obj is kept alive until the next request by its active ref */
i915_gem_object_ggtt_unpin(obj);
i915_vma_move_to_active(from->engine[RCS].state, req, 0);
/* state is kept alive until the next request */
i915_vma_unpin(from->engine[RCS].state);
i915_gem_context_put(from);
}
engine->last_context = i915_gem_context_get(to);
@ -882,8 +888,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
return 0;
unpin_out:
i915_gem_object_ggtt_unpin(to->engine[RCS].state);
err:
i915_vma_unpin(vma);
return ret;
}

View file

@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
if (ret)
return ERR_PTR(ret);
addr = i915_gem_object_pin_map(obj);
addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
mutex_unlock(&dev->struct_mutex);
return addr;

View file

@ -47,7 +47,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
}
static bool
mark_free(struct i915_vma *vma, struct list_head *unwind)
mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
{
if (i915_vma_is_pinned(vma))
return false;
@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
if (WARN_ON(!list_empty(&vma->exec_list)))
return false;
if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
return false;
list_add(&vma->exec_list, unwind);
return drm_mm_scan_add_block(&vma->node);
}
@ -129,7 +132,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
phase = phases;
do {
list_for_each_entry(vma, *phase, vm_link)
if (mark_free(vma, &eviction_list))
if (mark_free(vma, flags, &eviction_list))
goto found;
} while (*++phase);

View file

@ -39,6 +39,8 @@
#include "intel_drv.h"
#include "intel_frontbuffer.h"
#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
@ -59,6 +61,7 @@ struct i915_execbuffer_params {
};
struct eb_vmas {
struct drm_i915_private *i915;
struct list_head vmas;
int and;
union {
@ -68,7 +71,8 @@ struct eb_vmas {
};
static struct eb_vmas *
eb_create(struct drm_i915_gem_execbuffer2 *args)
eb_create(struct drm_i915_private *i915,
struct drm_i915_gem_execbuffer2 *args)
{
struct eb_vmas *eb = NULL;
@ -95,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)
} else
eb->and = -args->buffer_count;
eb->i915 = i915;
INIT_LIST_HEAD(&eb->vmas);
return eb;
}
@ -180,8 +185,8 @@ eb_lookup_vmas(struct eb_vmas *eb,
* from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm.
*/
vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma)) {
vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
if (unlikely(IS_ERR(vma))) {
DRM_DEBUG("Failed to lookup VMA\n");
ret = PTR_ERR(vma);
goto err;
@ -245,7 +250,6 @@ static void
i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry;
struct drm_i915_gem_object *obj = vma->obj;
if (!drm_mm_node_allocated(&vma->node))
return;
@ -253,7 +257,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
entry = vma->exec_entry;
if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
i915_gem_object_unpin_fence(obj);
i915_vma_unpin_fence(vma);
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
__i915_vma_unpin(vma);
@ -271,13 +275,19 @@ static void eb_destroy(struct eb_vmas *eb)
exec_list);
list_del_init(&vma->exec_list);
i915_gem_execbuffer_unreserve_vma(vma);
i915_gem_object_put(vma->obj);
i915_vma_put(vma);
}
kfree(eb);
}
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
if (!i915_gem_object_has_struct_page(obj))
return false;
if (DBG_USE_CPU_RELOC)
return DBG_USE_CPU_RELOC > 0;
return (HAS_LLC(obj->base.dev) ||
obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
obj->cache_level != I915_CACHE_NONE);
@ -302,137 +312,243 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address)
}
static inline uint64_t
relocation_target(struct drm_i915_gem_relocation_entry *reloc,
relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
uint64_t target_offset)
{
return gen8_canonical_addr((int)reloc->delta + target_offset);
}
static int
relocate_entry_cpu(struct drm_i915_gem_object *obj,
struct drm_i915_gem_relocation_entry *reloc,
uint64_t target_offset)
struct reloc_cache {
struct drm_i915_private *i915;
struct drm_mm_node node;
unsigned long vaddr;
unsigned int page;
bool use_64bit_reloc;
};
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
struct drm_device *dev = obj->base.dev;
uint32_t page_offset = offset_in_page(reloc->offset);
uint64_t delta = relocation_target(reloc, target_offset);
char *vaddr;
int ret;
cache->page = -1;
cache->vaddr = 0;
cache->i915 = i915;
cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
cache->node.allocated = false;
}
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
return ret;
static inline void *unmask_page(unsigned long p)
{
return (void *)(uintptr_t)(p & PAGE_MASK);
}
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
reloc->offset >> PAGE_SHIFT));
*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
static inline unsigned int unmask_flags(unsigned long p)
{
return p & ~PAGE_MASK;
}
if (INTEL_INFO(dev)->gen >= 8) {
page_offset = offset_in_page(page_offset + sizeof(uint32_t));
#define KMAP 0x4 /* after CLFLUSH_FLAGS */
if (page_offset == 0) {
kunmap_atomic(vaddr);
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
static void reloc_cache_fini(struct reloc_cache *cache)
{
void *vaddr;
if (!cache->vaddr)
return;
vaddr = unmask_page(cache->vaddr);
if (cache->vaddr & KMAP) {
if (cache->vaddr & CLFLUSH_AFTER)
mb();
kunmap_atomic(vaddr);
i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
} else {
wmb();
io_mapping_unmap_atomic((void __iomem *)vaddr);
if (cache->node.allocated) {
struct i915_ggtt *ggtt = &cache->i915->ggtt;
ggtt->base.clear_range(&ggtt->base,
cache->node.start,
cache->node.size,
true);
drm_mm_remove_node(&cache->node);
} else {
i915_vma_unpin((struct i915_vma *)cache->node.mm);
}
}
}
*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
static void *reloc_kmap(struct drm_i915_gem_object *obj,
struct reloc_cache *cache,
int page)
{
void *vaddr;
if (cache->vaddr) {
kunmap_atomic(unmask_page(cache->vaddr));
} else {
unsigned int flushes;
int ret;
ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
if (ret)
return ERR_PTR(ret);
BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
cache->vaddr = flushes | KMAP;
cache->node.mm = (void *)obj;
if (flushes)
mb();
}
kunmap_atomic(vaddr);
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
cache->page = page;
return 0;
return vaddr;
}
static void *reloc_iomap(struct drm_i915_gem_object *obj,
struct reloc_cache *cache,
int page)
{
struct i915_ggtt *ggtt = &cache->i915->ggtt;
unsigned long offset;
void *vaddr;
if (cache->node.allocated) {
wmb();
ggtt->base.insert_page(&ggtt->base,
i915_gem_object_get_dma_address(obj, page),
cache->node.start, I915_CACHE_NONE, 0);
cache->page = page;
return unmask_page(cache->vaddr);
}
if (cache->vaddr) {
io_mapping_unmap_atomic(unmask_page(cache->vaddr));
} else {
struct i915_vma *vma;
int ret;
if (use_cpu_reloc(obj))
return NULL;
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
return ERR_PTR(ret);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE | PIN_NONBLOCK);
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
ret = drm_mm_insert_node_in_range_generic
(&ggtt->base.mm, &cache->node,
4096, 0, 0,
0, ggtt->mappable_end,
DRM_MM_SEARCH_DEFAULT,
DRM_MM_CREATE_DEFAULT);
if (ret)
return ERR_PTR(ret);
} else {
ret = i915_vma_put_fence(vma);
if (ret) {
i915_vma_unpin(vma);
return ERR_PTR(ret);
}
cache->node.start = vma->node.start;
cache->node.mm = (void *)vma;
}
}
offset = cache->node.start;
if (cache->node.allocated) {
ggtt->base.insert_page(&ggtt->base,
i915_gem_object_get_dma_address(obj, page),
offset, I915_CACHE_NONE, 0);
} else {
offset += page << PAGE_SHIFT;
}
vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
cache->page = page;
cache->vaddr = (unsigned long)vaddr;
return vaddr;
}
static void *reloc_vaddr(struct drm_i915_gem_object *obj,
struct reloc_cache *cache,
int page)
{
void *vaddr;
if (cache->page == page) {
vaddr = unmask_page(cache->vaddr);
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
vaddr = reloc_iomap(obj, cache, page);
if (!vaddr)
vaddr = reloc_kmap(obj, cache, page);
}
return vaddr;
}
static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
{
if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
if (flushes & CLFLUSH_BEFORE) {
clflushopt(addr);
mb();
}
*addr = value;
/* Writes to the same cacheline are serialised by the CPU
* (including clflush). On the write path, we only require
* that it hits memory in an orderly fashion and place
* mb barriers at the start and end of the relocation phase
* to ensure ordering of clflush wrt to the system.
*/
if (flushes & CLFLUSH_AFTER)
clflushopt(addr);
} else
*addr = value;
}
static int
relocate_entry_gtt(struct drm_i915_gem_object *obj,
struct drm_i915_gem_relocation_entry *reloc,
uint64_t target_offset)
relocate_entry(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_relocation_entry *reloc,
struct reloc_cache *cache,
u64 target_offset)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint64_t delta = relocation_target(reloc, target_offset);
uint64_t offset;
void __iomem *reloc_page;
int ret;
u64 offset = reloc->offset;
bool wide = cache->use_64bit_reloc;
void *vaddr;
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
return ret;
target_offset = relocation_target(reloc, target_offset);
repeat:
vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
clflush_write32(vaddr + offset_in_page(offset),
lower_32_bits(target_offset),
cache->vaddr);
/* Map the page containing the relocation we're going to perform. */
offset = i915_gem_obj_ggtt_offset(obj);
offset += reloc->offset;
reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
offset & PAGE_MASK);
iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
if (INTEL_INFO(dev)->gen >= 8) {
offset += sizeof(uint32_t);
if (offset_in_page(offset) == 0) {
io_mapping_unmap_atomic(reloc_page);
reloc_page =
io_mapping_map_atomic_wc(ggtt->mappable,
offset);
}
iowrite32(upper_32_bits(delta),
reloc_page + offset_in_page(offset));
if (wide) {
offset += sizeof(u32);
target_offset >>= 32;
wide = false;
goto repeat;
}
io_mapping_unmap_atomic(reloc_page);
return 0;
}
static void
clflush_write32(void *addr, uint32_t value)
{
/* This is not a fast path, so KISS. */
drm_clflush_virt_range(addr, sizeof(uint32_t));
*(uint32_t *)addr = value;
drm_clflush_virt_range(addr, sizeof(uint32_t));
}
static int
relocate_entry_clflush(struct drm_i915_gem_object *obj,
struct drm_i915_gem_relocation_entry *reloc,
uint64_t target_offset)
{
struct drm_device *dev = obj->base.dev;
uint32_t page_offset = offset_in_page(reloc->offset);
uint64_t delta = relocation_target(reloc, target_offset);
char *vaddr;
int ret;
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
return ret;
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
reloc->offset >> PAGE_SHIFT));
clflush_write32(vaddr + page_offset, lower_32_bits(delta));
if (INTEL_INFO(dev)->gen >= 8) {
page_offset = offset_in_page(page_offset + sizeof(uint32_t));
if (page_offset == 0) {
kunmap_atomic(vaddr);
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
}
clflush_write32(vaddr + page_offset, upper_32_bits(delta));
}
kunmap_atomic(vaddr);
return 0;
}
@ -453,7 +569,8 @@ static bool object_is_idle(struct drm_i915_gem_object *obj)
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_vmas *eb,
struct drm_i915_gem_relocation_entry *reloc)
struct drm_i915_gem_relocation_entry *reloc,
struct reloc_cache *cache)
{
struct drm_device *dev = obj->base.dev;
struct drm_gem_object *target_obj;
@ -516,7 +633,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
/* Check that the relocation address is valid... */
if (unlikely(reloc->offset >
obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
DRM_DEBUG("Relocation beyond object bounds: "
"obj %p target %d offset %d size %d.\n",
obj, reloc->target_handle,
@ -536,23 +653,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (pagefault_disabled() && !object_is_idle(obj))
return -EFAULT;
if (use_cpu_reloc(obj))
ret = relocate_entry_cpu(obj, reloc, target_offset);
else if (obj->map_and_fenceable)
ret = relocate_entry_gtt(obj, reloc, target_offset);
else if (static_cpu_has(X86_FEATURE_CLFLUSH))
ret = relocate_entry_clflush(obj, reloc, target_offset);
else {
WARN_ONCE(1, "Impossible case in relocation handling\n");
ret = -ENODEV;
}
ret = relocate_entry(obj, reloc, cache, target_offset);
if (ret)
return ret;
/* and update the user's relocation entry */
reloc->presumed_offset = target_offset;
return 0;
}
@ -564,9 +670,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
struct drm_i915_gem_relocation_entry __user *user_relocs;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
int remain, ret;
struct reloc_cache cache;
int remain, ret = 0;
user_relocs = u64_to_user_ptr(entry->relocs_ptr);
reloc_cache_init(&cache, eb->i915);
remain = entry->relocation_count;
while (remain) {
@ -576,19 +684,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
count = ARRAY_SIZE(stack_reloc);
remain -= count;
if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
return -EFAULT;
if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) {
ret = -EFAULT;
goto out;
}
do {
u64 offset = r->presumed_offset;
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
if (ret)
return ret;
goto out;
if (r->presumed_offset != offset &&
__put_user(r->presumed_offset, &user_relocs->presumed_offset)) {
return -EFAULT;
__put_user(r->presumed_offset,
&user_relocs->presumed_offset)) {
ret = -EFAULT;
goto out;
}
user_relocs++;
@ -596,7 +708,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
} while (--count);
}
return 0;
out:
reloc_cache_fini(&cache);
return ret;
#undef N_RELOC
}
@ -606,15 +720,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
struct drm_i915_gem_relocation_entry *relocs)
{
const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
int i, ret;
struct reloc_cache cache;
int i, ret = 0;
reloc_cache_init(&cache, eb->i915);
for (i = 0; i < entry->relocation_count; i++) {
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
if (ret)
return ret;
break;
}
reloc_cache_fini(&cache);
return 0;
return ret;
}
static int
@ -693,11 +810,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
entry->flags |= __EXEC_OBJECT_HAS_PIN;
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
ret = i915_gem_object_get_fence(obj);
ret = i915_vma_get_fence(vma);
if (ret)
return ret;
if (i915_gem_object_pin_fence(obj))
if (i915_vma_pin_fence(vma))
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
}
@ -739,7 +856,6 @@ static bool
eb_vma_misplaced(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_vma_is_ggtt(vma));
@ -760,7 +876,8 @@ eb_vma_misplaced(struct i915_vma *vma)
return true;
/* avoid costly ping-pong once a batch bo ended up non-mappable */
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_vma_is_map_and_fenceable(vma))
return !only_mappable_for_reloc(entry->flags);
if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
@ -900,7 +1017,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
list_del_init(&vma->exec_list);
i915_gem_execbuffer_unreserve_vma(vma);
i915_gem_object_put(vma->obj);
i915_vma_put(vma);
}
mutex_unlock(&dev->struct_mutex);
@ -1010,8 +1127,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
{
const unsigned int other_rings = eb_other_engines(req);
struct i915_vma *vma;
uint32_t flush_domains = 0;
bool flush_chipset = false;
int ret;
list_for_each_entry(vma, vmas, exec_list) {
@ -1024,16 +1139,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
}
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
flush_chipset |= i915_gem_clflush_object(obj, false);
flush_domains |= obj->base.write_domain;
i915_gem_clflush_object(obj, false);
}
if (flush_chipset)
i915_gem_chipset_flush(req->engine->i915);
if (flush_domains & I915_GEM_DOMAIN_GTT)
wmb();
/* Unconditionally flush any chipset caches (for streaming writes). */
i915_gem_chipset_flush(req->engine->i915);
/* Unconditionally invalidate GPU caches and TLBs. */
return req->engine->emit_flush(req, EMIT_INVALIDATE);
@ -1194,15 +1304,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
}
if (flags & EXEC_OBJECT_NEEDS_FENCE) {
i915_gem_active_set(&obj->last_fence, req);
if (flags & __EXEC_OBJECT_HAS_FENCE) {
struct drm_i915_private *dev_priv = req->i915;
list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
&dev_priv->mm.fence_list);
}
}
if (flags & EXEC_OBJECT_NEEDS_FENCE)
i915_gem_active_set(&vma->last_fence, req);
i915_vma_set_active(vma, idx);
i915_gem_active_set(&vma->last_read[idx], req);
@ -1281,7 +1384,7 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
return 0;
}
static struct i915_vma*
static struct i915_vma *
i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
struct drm_i915_gem_exec_object2 *shadow_exec_entry,
struct drm_i915_gem_object *batch_obj,
@ -1305,31 +1408,28 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
batch_start_offset,
batch_len,
is_master);
if (ret)
goto err;
if (ret) {
if (ret == -EACCES) /* unhandled chained batch */
vma = NULL;
else
vma = ERR_PTR(ret);
goto out;
}
ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
if (ret)
goto err;
i915_gem_object_unpin_pages(shadow_batch_obj);
vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
if (IS_ERR(vma))
goto out;
memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = shadow_exec_entry;
vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
i915_gem_object_get(shadow_batch_obj);
list_add_tail(&vma->exec_list, &eb->vmas);
return vma;
err:
out:
i915_gem_object_unpin_pages(shadow_batch_obj);
if (ret == -EACCES) /* unhandled chained batch */
return NULL;
else
return ERR_PTR(ret);
return vma;
}
static int
@ -1412,7 +1512,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
params->args_batch_start_offset;
if (exec_len == 0)
exec_len = params->batch->size;
exec_len = params->batch->size - params->args_batch_start_offset;
ret = params->engine->emit_bb_start(params->request,
exec_start, exec_len,
@ -1595,7 +1695,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
memset(&params_master, 0x00, sizeof(params_master));
eb = eb_create(args);
eb = eb_create(dev_priv, args);
if (eb == NULL) {
i915_gem_context_put(ctx);
mutex_unlock(&dev->struct_mutex);
@ -1638,6 +1738,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = -EINVAL;
goto err;
}
if (args->batch_start_offset > params->batch->size ||
args->batch_len > params->batch->size - args->batch_start_offset) {
DRM_DEBUG("Attempting to use out-of-bounds batch\n");
ret = -EINVAL;
goto err;
}
params->args_batch_start_offset = args->batch_start_offset;
if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
@ -1677,6 +1783,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* hsw should have this fixed, but bdw mucks it up again. */
if (dispatch_flags & I915_DISPATCH_SECURE) {
struct drm_i915_gem_object *obj = params->batch->obj;
struct i915_vma *vma;
/*
* So on first glance it looks freaky that we pin the batch here
@ -1688,11 +1795,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* fitting due to fragmentation.
* So this is actually safe.
*/
ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
if (ret)
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
}
params->batch = i915_gem_obj_to_ggtt(obj);
params->batch = vma;
}
/* Allocate a request for this batch buffer nice and early. */
@ -1702,6 +1811,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err_batch_unpin;
}
/* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the
* inactive_list and lose its active reference. Hence we do not need
* to explicitly hold another reference here.
*/
params->request->batch = params->batch;
ret = i915_gem_request_add_to_client(params->request, file);
if (ret)
goto err_request;
@ -1720,7 +1837,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = execbuf_submit(params, args, &eb->vmas);
err_request:
__i915_add_request(params->request, params->batch->obj, ret == 0);
__i915_add_request(params->request, ret == 0);
err_batch_unpin:
/*

View file

@ -55,87 +55,85 @@
* CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
*/
static void i965_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
#define pipelined 0
static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
u64 val;
if (INTEL_INFO(dev)->gen >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(reg);
fence_reg_hi = FENCE_REG_GEN6_HI(reg);
if (INTEL_INFO(fence->i915)->gen >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
} else {
fence_reg_lo = FENCE_REG_965_LO(reg);
fence_reg_hi = FENCE_REG_965_HI(reg);
fence_reg_lo = FENCE_REG_965_LO(fence->id);
fence_reg_hi = FENCE_REG_965_HI(fence->id);
fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
}
/* To w/a incoherency with non-atomic 64-bit register updates,
* we split the 64-bit update into two 32-bit writes. In order
* for a partial fence not to be evaluated between writes, we
* precede the update with write to turn off the fence register,
* and only enable the fence as the last step.
*
* For extra levels of paranoia, we make sure each step lands
* before applying the next step.
*/
I915_WRITE(fence_reg_lo, 0);
POSTING_READ(fence_reg_lo);
val = 0;
if (vma) {
unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
bool is_y_tiled = tiling == I915_TILING_Y;
unsigned int stride = i915_gem_object_get_stride(vma->obj);
u32 row_size = stride * (is_y_tiled ? 32 : 8);
u32 size = rounddown((u32)vma->node.size, row_size);
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint64_t val;
/* Adjust fence size to match tiled area */
if (tiling != I915_TILING_NONE) {
uint32_t row_size = stride *
(tiling == I915_TILING_Y ? 32 : 8);
size = (size / row_size) * row_size;
}
val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
0xfffff000) << 32;
val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
if (tiling == I915_TILING_Y)
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
val |= vma->node.start & 0xfffff000;
val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
if (is_y_tiled)
val |= BIT(I965_FENCE_TILING_Y_SHIFT);
val |= I965_FENCE_REG_VALID;
}
I915_WRITE(fence_reg_hi, val >> 32);
POSTING_READ(fence_reg_hi);
if (!pipelined) {
struct drm_i915_private *dev_priv = fence->i915;
I915_WRITE(fence_reg_lo, val);
/* To w/a incoherency with non-atomic 64-bit register updates,
* we split the 64-bit update into two 32-bit writes. In order
* for a partial fence not to be evaluated between writes, we
* precede the update with write to turn off the fence register,
* and only enable the fence as the last step.
*
* For extra levels of paranoia, we make sure each step lands
* before applying the next step.
*/
I915_WRITE(fence_reg_lo, 0);
POSTING_READ(fence_reg_lo);
I915_WRITE(fence_reg_hi, upper_32_bits(val));
I915_WRITE(fence_reg_lo, lower_32_bits(val));
POSTING_READ(fence_reg_lo);
} else {
I915_WRITE(fence_reg_hi, 0);
POSTING_READ(fence_reg_hi);
}
}
static void i915_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
u32 val;
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
val = 0;
if (vma) {
unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
bool is_y_tiled = tiling == I915_TILING_Y;
unsigned int stride = i915_gem_object_get_stride(vma->obj);
int pitch_val;
int tile_width;
WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
(size & -size) != size ||
(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
!is_power_of_2(vma->node.size) ||
(vma->node.start & (vma->node.size - 1)),
"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
vma->node.start,
i915_vma_is_map_and_fenceable(vma),
vma->node.size);
if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
tile_width = 128;
else
tile_width = 512;
@ -144,139 +142,141 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
pitch_val = stride / tile_width;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I915_FENCE_SIZE_BITS(size);
val = vma->node.start;
if (is_y_tiled)
val |= BIT(I830_FENCE_TILING_Y_SHIFT);
val |= I915_FENCE_SIZE_BITS(vma->node.size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
val |= I830_FENCE_REG_VALID;
} else
val = 0;
}
I915_WRITE(FENCE_REG(reg), val);
POSTING_READ(FENCE_REG(reg));
if (!pipelined) {
struct drm_i915_private *dev_priv = fence->i915;
i915_reg_t reg = FENCE_REG(fence->id);
I915_WRITE(reg, val);
POSTING_READ(reg);
}
}
static void i830_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
uint32_t val;
u32 val;
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint32_t pitch_val;
val = 0;
if (vma) {
unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
bool is_y_tiled = tiling == I915_TILING_Y;
unsigned int stride = i915_gem_object_get_stride(vma->obj);
u32 pitch_val;
WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
(size & -size) != size ||
(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
"object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
i915_gem_obj_ggtt_offset(obj), size);
WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
!is_power_of_2(vma->node.size) ||
(vma->node.start & (vma->node.size - 1)),
"object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
vma->node.start, vma->node.size);
pitch_val = stride / 128;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I830_FENCE_SIZE_BITS(size);
val = vma->node.start;
if (is_y_tiled)
val |= BIT(I830_FENCE_TILING_Y_SHIFT);
val |= I830_FENCE_SIZE_BITS(vma->node.size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
val |= I830_FENCE_REG_VALID;
} else
val = 0;
I915_WRITE(FENCE_REG(reg), val);
POSTING_READ(FENCE_REG(reg));
}
inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
{
return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
}
static void i915_gem_write_fence(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = to_i915(dev);
/* Ensure that all CPU reads are completed before installing a fence
* and all writes before removing the fence.
*/
if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
mb();
WARN(obj &&
(!i915_gem_object_get_stride(obj) ||
!i915_gem_object_get_tiling(obj)),
"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
i915_gem_object_get_stride(obj),
i915_gem_object_get_tiling(obj));
if (IS_GEN2(dev))
i830_write_fence_reg(dev, reg, obj);
else if (IS_GEN3(dev))
i915_write_fence_reg(dev, reg, obj);
else if (INTEL_INFO(dev)->gen >= 4)
i965_write_fence_reg(dev, reg, obj);
/* And similarly be paranoid that no direct access to this region
* is reordered to before the fence is installed.
*/
if (i915_gem_object_needs_mb(obj))
mb();
}
static inline int fence_number(struct drm_i915_private *dev_priv,
struct drm_i915_fence_reg *fence)
{
return fence - dev_priv->fence_regs;
}
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
struct drm_i915_fence_reg *fence,
bool enable)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
int reg = fence_number(dev_priv, fence);
i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
if (enable) {
obj->fence_reg = reg;
fence->obj = obj;
list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
} else {
obj->fence_reg = I915_FENCE_REG_NONE;
fence->obj = NULL;
list_del_init(&fence->lru_list);
}
obj->fence_dirty = false;
if (!pipelined) {
struct drm_i915_private *dev_priv = fence->i915;
i915_reg_t reg = FENCE_REG(fence->id);
I915_WRITE(reg, val);
POSTING_READ(reg);
}
}
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
static void fence_write(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
if (i915_gem_object_is_tiled(obj))
i915_gem_release_mmap(obj);
/* As we do not have an associated fence register, we will force
* a tiling change if we ever need to acquire one.
/* Previous access through the fence register is marshalled by
* the mb() inside the fault handlers (i915_gem_release_mmaps)
* and explicitly managed for internal users.
*/
obj->fence_dirty = false;
obj->fence_reg = I915_FENCE_REG_NONE;
if (IS_GEN2(fence->i915))
i830_write_fence_reg(fence, vma);
else if (IS_GEN3(fence->i915))
i915_write_fence_reg(fence, vma);
else
i965_write_fence_reg(fence, vma);
/* Access through the fenced region afterwards is
* ordered by the posting reads whilst writing the registers.
*/
fence->dirty = false;
}
static int
i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
static int fence_update(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
return i915_gem_active_retire(&obj->last_fence,
&obj->base.dev->struct_mutex);
int ret;
if (vma) {
if (!i915_vma_is_map_and_fenceable(vma))
return -EINVAL;
if (WARN(!i915_gem_object_get_stride(vma->obj) ||
!i915_gem_object_get_tiling(vma->obj),
"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
i915_gem_object_get_stride(vma->obj),
i915_gem_object_get_tiling(vma->obj)))
return -EINVAL;
ret = i915_gem_active_retire(&vma->last_fence,
&vma->obj->base.dev->struct_mutex);
if (ret)
return ret;
}
if (fence->vma) {
ret = i915_gem_active_retire(&fence->vma->last_fence,
&fence->vma->obj->base.dev->struct_mutex);
if (ret)
return ret;
}
if (fence->vma && fence->vma != vma) {
/* Ensure that all userspace CPU access is completed before
* stealing the fence.
*/
i915_gem_release_mmap(fence->vma->obj);
fence->vma->fence = NULL;
fence->vma = NULL;
list_move(&fence->link, &fence->i915->mm.fence_list);
}
fence_write(fence, vma);
if (vma) {
if (fence->vma != vma) {
vma->fence = fence;
fence->vma = vma;
}
list_move_tail(&fence->link, &fence->i915->mm.fence_list);
}
return 0;
}
/**
* i915_gem_object_put_fence - force-remove fence for an object
* @obj: object to map through a fence reg
* i915_vma_put_fence - force-remove fence for a VMA
* @vma: vma to map linearly (not through a fence reg)
*
* This function force-removes any fence from the given object, which is useful
* if the kernel wants to do untiled GTT access.
@ -286,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
* 0 on success, negative error code on failure.
*/
int
i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
i915_vma_put_fence(struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct drm_i915_fence_reg *fence;
int ret;
struct drm_i915_fence_reg *fence = vma->fence;
ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
if (obj->fence_reg == I915_FENCE_REG_NONE)
if (!fence)
return 0;
fence = &dev_priv->fence_regs[obj->fence_reg];
if (WARN_ON(fence->pin_count))
if (fence->pin_count)
return -EBUSY;
i915_gem_object_fence_lost(obj);
i915_gem_object_update_fence(obj, fence, false);
return 0;
return fence_update(fence, NULL);
}
static struct drm_i915_fence_reg *
i915_find_fence_reg(struct drm_device *dev)
static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_fence_reg *reg, *avail;
int i;
struct drm_i915_fence_reg *fence;
/* First try to find a free reg */
avail = NULL;
for (i = 0; i < dev_priv->num_fence_regs; i++) {
reg = &dev_priv->fence_regs[i];
if (!reg->obj)
return reg;
if (!reg->pin_count)
avail = reg;
}
if (avail == NULL)
goto deadlock;
/* None available, try to steal one or wait for a user to finish */
list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
if (reg->pin_count)
list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
if (fence->pin_count)
continue;
return reg;
return fence;
}
deadlock:
/* Wait for completion of pending flips which consume fences */
if (intel_has_pending_fb_unpin(dev))
if (intel_has_pending_fb_unpin(&dev_priv->drm))
return ERR_PTR(-EAGAIN);
return ERR_PTR(-EDEADLK);
}
/**
* i915_gem_object_get_fence - set up fencing for an object
* @obj: object to map through a fence reg
* i915_vma_get_fence - set up fencing for a vma
* @vma: vma to map through a fence reg
*
* When mapping objects through the GTT, userspace wants to be able to write
* to them without having to worry about swizzling if the object is tiled.
@ -366,103 +336,27 @@ i915_find_fence_reg(struct drm_device *dev)
* 0 on success, negative error code on failure.
*/
int
i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
i915_vma_get_fence(struct i915_vma *vma)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
bool enable = i915_gem_object_is_tiled(obj);
struct drm_i915_fence_reg *reg;
int ret;
/* Have we updated the tiling parameters upon the object and so
* will need to serialise the write to the associated fence register?
*/
if (obj->fence_dirty) {
ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
}
struct drm_i915_fence_reg *fence;
struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
/* Just update our place in the LRU if our fence is getting reused. */
if (obj->fence_reg != I915_FENCE_REG_NONE) {
reg = &dev_priv->fence_regs[obj->fence_reg];
if (!obj->fence_dirty) {
list_move_tail(&reg->lru_list,
&dev_priv->mm.fence_list);
if (vma->fence) {
fence = vma->fence;
if (!fence->dirty) {
list_move_tail(&fence->link,
&fence->i915->mm.fence_list);
return 0;
}
} else if (enable) {
if (WARN_ON(!obj->map_and_fenceable))
return -EINVAL;
reg = i915_find_fence_reg(dev);
if (IS_ERR(reg))
return PTR_ERR(reg);
if (reg->obj) {
struct drm_i915_gem_object *old = reg->obj;
ret = i915_gem_object_wait_fence(old);
if (ret)
return ret;
i915_gem_object_fence_lost(old);
}
} else if (set) {
fence = fence_find(to_i915(vma->vm->dev));
if (IS_ERR(fence))
return PTR_ERR(fence);
} else
return 0;
i915_gem_object_update_fence(obj, reg, enable);
return 0;
}
/**
* i915_gem_object_pin_fence - pin fencing state
* @obj: object to pin fencing for
*
* This pins the fencing state (whether tiled or untiled) to make sure the
* object is ready to be used as a scanout target. Fencing status must be
* synchronize first by calling i915_gem_object_get_fence():
*
* The resulting fence pin reference must be released again with
* i915_gem_object_unpin_fence().
*
* Returns:
*
* True if the object has a fence, false otherwise.
*/
bool
i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
{
if (obj->fence_reg != I915_FENCE_REG_NONE) {
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
WARN_ON(!ggtt_vma ||
dev_priv->fence_regs[obj->fence_reg].pin_count >
i915_vma_pin_count(ggtt_vma));
dev_priv->fence_regs[obj->fence_reg].pin_count++;
return true;
} else
return false;
}
/**
* i915_gem_object_unpin_fence - unpin fencing state
* @obj: object to unpin fencing for
*
* This releases the fence pin reference acquired through
* i915_gem_object_pin_fence. It will handle both objects with and without an
* attached fence correctly, callers do not need to distinguish this.
*/
void
i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
{
if (obj->fence_reg != I915_FENCE_REG_NONE) {
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
dev_priv->fence_regs[obj->fence_reg].pin_count--;
}
return fence_update(fence, set);
}
/**
@ -479,17 +373,16 @@ void i915_gem_restore_fences(struct drm_device *dev)
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
struct i915_vma *vma = reg->vma;
/*
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
if (reg->obj) {
i915_gem_object_update_fence(reg->obj, reg,
i915_gem_object_get_tiling(reg->obj));
} else {
i915_gem_write_fence(dev, i, NULL);
}
if (vma && !i915_gem_object_is_tiled(vma->obj))
vma = NULL;
fence_update(reg, vma);
}
}

View file

@ -170,11 +170,13 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
{
u32 pte_flags = 0;
vma->pages = vma->obj->pages;
/* Currently applicable only to VLV */
if (vma->obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
cache_level, pte_flags);
return 0;
@ -2618,8 +2620,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
if (obj->gt_ro)
pte_flags |= PTE_READ_ONLY;
vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
vma->node.start,
vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
cache_level, pte_flags);
/*
@ -2651,8 +2652,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
if (flags & I915_VMA_GLOBAL_BIND) {
vma->vm->insert_entries(vma->vm,
vma->ggtt_view.pages,
vma->node.start,
vma->pages, vma->node.start,
cache_level, pte_flags);
}
@ -2660,8 +2660,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
struct i915_hw_ppgtt *appgtt =
to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
appgtt->base.insert_entries(&appgtt->base,
vma->ggtt_view.pages,
vma->node.start,
vma->pages, vma->node.start,
cache_level, pte_flags);
}
@ -2795,7 +2794,6 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
ppgtt->base.cleanup(&ppgtt->base);
kfree(ppgtt);
}
@ -2812,7 +2810,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
ggtt->base.cleanup(&ggtt->base);
arch_phys_wc_del(ggtt->mtrr);
io_mapping_free(ggtt->mappable);
io_mapping_fini(&ggtt->mappable);
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@ -3210,9 +3208,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
if (!HAS_LLC(dev_priv))
ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
ggtt->mappable =
io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end);
if (!ggtt->mappable) {
if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
dev_priv->ggtt.mappable_base,
dev_priv->ggtt.mappable_end)) {
ret = -EIO;
goto out_gtt_cleanup;
}
@ -3323,6 +3321,7 @@ void i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(i915_vma_is_active(vma));
GEM_BUG_ON(!i915_vma_is_closed(vma));
GEM_BUG_ON(vma->fence);
list_del(&vma->vm_link);
if (!i915_vma_is_ggtt(vma))
@ -3342,33 +3341,29 @@ void i915_vma_close(struct i915_vma *vma)
}
static struct i915_vma *
__i915_gem_vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
__i915_vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
struct i915_vma *vma;
int i;
GEM_BUG_ON(vm->closed);
if (WARN_ON(i915_is_ggtt(vm) != !!view))
return ERR_PTR(-EINVAL);
vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
if (vma == NULL)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&vma->obj_link);
INIT_LIST_HEAD(&vma->exec_list);
for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
init_request_active(&vma->last_read[i], i915_vma_retire);
init_request_active(&vma->last_fence, NULL);
list_add(&vma->vm_link, &vm->unbound_list);
vma->vm = vm;
vma->obj = obj;
vma->size = obj->base.size;
if (i915_is_ggtt(vm)) {
vma->flags |= I915_VMA_GGTT;
if (view) {
vma->ggtt_view = *view;
if (view->type == I915_GGTT_VIEW_PARTIAL) {
vma->size = view->params.partial.size;
@ -3378,46 +3373,79 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
intel_rotation_info_size(&view->params.rotated);
vma->size <<= PAGE_SHIFT;
}
}
if (i915_is_ggtt(vm)) {
vma->flags |= I915_VMA_GGTT;
} else {
i915_ppgtt_get(i915_vm_to_ppgtt(vm));
}
list_add_tail(&vma->obj_link, &obj->vma_list);
return vma;
}
static inline bool vma_matches(struct i915_vma *vma,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
if (vma->vm != vm)
return false;
if (!i915_vma_is_ggtt(vma))
return true;
if (!view)
return vma->ggtt_view.type == 0;
if (vma->ggtt_view.type != view->type)
return false;
return memcmp(&vma->ggtt_view.params,
&view->params,
sizeof(view->params)) == 0;
}
struct i915_vma *
i915_vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
GEM_BUG_ON(view && !i915_is_ggtt(vm));
GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
return __i915_vma_create(obj, vm, view);
}
struct i915_vma *
i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
struct i915_vma *vma;
list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
if (vma_matches(vma, vm, view))
return vma;
return NULL;
}
struct i915_vma *
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
struct i915_address_space *vm)
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
struct i915_vma *vma;
vma = i915_gem_obj_to_vma(obj, vm);
GEM_BUG_ON(view && !i915_is_ggtt(vm));
vma = i915_gem_obj_to_vma(obj, vm, view);
if (!vma)
vma = __i915_gem_vma_create(obj, vm,
i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
return vma;
}
struct i915_vma *
i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
GEM_BUG_ON(!view);
if (!vma)
vma = __i915_gem_vma_create(obj, &ggtt->base, view);
vma = __i915_vma_create(obj, vm, view);
GEM_BUG_ON(i915_vma_is_closed(vma));
return vma;
}
static struct scatterlist *
@ -3449,18 +3477,16 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
}
static struct sg_table *
intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
struct drm_i915_gem_object *obj)
{
const size_t n_pages = obj->base.size / PAGE_SIZE;
unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
unsigned int size_pages_uv;
unsigned int size = intel_rotation_info_size(rot_info);
struct sgt_iter sgt_iter;
dma_addr_t dma_addr;
unsigned long i;
dma_addr_t *page_addr_list;
struct sg_table *st;
unsigned int uv_start_page;
struct scatterlist *sg;
int ret = -ENOMEM;
@ -3471,18 +3497,12 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
if (!page_addr_list)
return ERR_PTR(ret);
/* Account for UV plane with NV12. */
if (rot_info->pixel_format == DRM_FORMAT_NV12)
size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
else
size_pages_uv = 0;
/* Allocate target SG list. */
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st)
goto err_st_alloc;
ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
ret = sg_alloc_table(st, size, GFP_KERNEL);
if (ret)
goto err_sg_alloc;
@ -3495,32 +3515,14 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
st->nents = 0;
sg = st->sgl;
/* Rotate the pages. */
sg = rotate_pages(page_addr_list, 0,
rot_info->plane[0].width, rot_info->plane[0].height,
rot_info->plane[0].width,
st, sg);
/* Append the UV plane if NV12. */
if (rot_info->pixel_format == DRM_FORMAT_NV12) {
uv_start_page = size_pages;
/* Check for tile-row un-alignment. */
if (offset_in_page(rot_info->uv_offset))
uv_start_page--;
rot_info->uv_start_page = uv_start_page;
sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
rot_info->plane[1].width, rot_info->plane[1].height,
rot_info->plane[1].width,
st, sg);
for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
rot_info->plane[i].width, rot_info->plane[i].height,
rot_info->plane[i].stride, st, sg);
}
DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n",
obj->base.size, rot_info->plane[0].width,
rot_info->plane[0].height, size_pages + size_pages_uv,
size_pages);
DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
drm_free_large(page_addr_list);
@ -3531,10 +3533,9 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
err_st_alloc:
drm_free_large(page_addr_list);
DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n",
obj->base.size, ret, rot_info->plane[0].width,
rot_info->plane[0].height, size_pages + size_pages_uv,
size_pages);
DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
return ERR_PTR(ret);
}
@ -3584,28 +3585,27 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
{
int ret = 0;
if (vma->ggtt_view.pages)
if (vma->pages)
return 0;
if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
vma->ggtt_view.pages = vma->obj->pages;
vma->pages = vma->obj->pages;
else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
vma->ggtt_view.pages =
vma->pages =
intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
vma->ggtt_view.pages =
intel_partial_pages(&vma->ggtt_view, vma->obj);
vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
else
WARN_ONCE(1, "GGTT view %u not implemented!\n",
vma->ggtt_view.type);
if (!vma->ggtt_view.pages) {
if (!vma->pages) {
DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
vma->ggtt_view.type);
ret = -EINVAL;
} else if (IS_ERR(vma->ggtt_view.pages)) {
ret = PTR_ERR(vma->ggtt_view.pages);
vma->ggtt_view.pages = NULL;
} else if (IS_ERR(vma->pages)) {
ret = PTR_ERR(vma->pages);
vma->pages = NULL;
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
vma->ggtt_view.type, ret);
}
@ -3668,8 +3668,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
{
void __iomem *ptr;
/* Access through the GTT requires the device to be awake. */
assert_rpm_wakelock_held(to_i915(vma->vm->dev));
lockdep_assert_held(&vma->vm->dev->struct_mutex);
if (WARN_ON(!vma->obj->map_and_fenceable))
if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
return IO_ERR_PTR(-ENODEV);
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
@ -3677,7 +3680,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
ptr = vma->iomap;
if (ptr == NULL) {
ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable,
ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
vma->node.start,
vma->node.size);
if (ptr == NULL)
@ -3689,3 +3692,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
__i915_vma_pin(vma);
return ptr;
}
void i915_vma_unpin_and_release(struct i915_vma **p_vma)
{
struct i915_vma *vma;
vma = fetch_and_zero(p_vma);
if (!vma)
return;
i915_vma_unpin(vma);
i915_vma_put(vma);
}

View file

@ -38,7 +38,13 @@
#include "i915_gem_request.h"
#define I915_FENCE_REG_NONE -1
#define I915_MAX_NUM_FENCES 32
/* 32 fences + sign bit for FENCE_REG_NONE */
#define I915_MAX_NUM_FENCE_BITS 6
struct drm_i915_file_private;
struct drm_i915_fence_reg;
typedef uint32_t gen6_pte_t;
typedef uint64_t gen8_pte_t;
@ -139,12 +145,9 @@ enum i915_ggtt_view_type {
};
struct intel_rotation_info {
unsigned int uv_offset;
uint32_t pixel_format;
unsigned int uv_start_page;
struct {
/* tiles */
unsigned int width, height;
unsigned int width, height, stride, offset;
} plane[2];
};
@ -158,8 +161,6 @@ struct i915_ggtt_view {
} partial;
struct intel_rotation_info rotated;
} params;
struct sg_table *pages;
};
extern const struct i915_ggtt_view i915_ggtt_view_normal;
@ -179,8 +180,11 @@ struct i915_vma {
struct drm_mm_node node;
struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
struct drm_i915_fence_reg *fence;
struct sg_table *pages;
void __iomem *iomap;
u64 size;
u64 display_alignment;
unsigned int flags;
/**
@ -201,11 +205,13 @@ struct i915_vma {
#define I915_VMA_LOCAL_BIND BIT(7)
#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
#define I915_VMA_GGTT BIT(8)
#define I915_VMA_CLOSED BIT(9)
#define I915_VMA_GGTT BIT(8)
#define I915_VMA_CAN_FENCE BIT(9)
#define I915_VMA_CLOSED BIT(10)
unsigned int active;
struct i915_gem_active last_read[I915_NUM_ENGINES];
struct i915_gem_active last_fence;
/**
* Support different GGTT views into the same object.
@ -232,11 +238,22 @@ struct i915_vma {
struct drm_i915_gem_exec_object2 *exec_entry;
};
struct i915_vma *
i915_vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view);
void i915_vma_unpin_and_release(struct i915_vma **p_vma);
static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_GGTT;
}
static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_CAN_FENCE;
}
static inline bool i915_vma_is_closed(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_CLOSED;
@ -270,6 +287,15 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
return vma->active & BIT(engine);
}
static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
{
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
GEM_BUG_ON(!vma->node.allocated);
GEM_BUG_ON(upper_32_bits(vma->node.start));
GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
return lower_32_bits(vma->node.start);
}
struct i915_page_dma {
struct page *page;
union {
@ -413,13 +439,13 @@ struct i915_address_space {
*/
struct i915_ggtt {
struct i915_address_space base;
struct io_mapping mappable; /* Mapping to our CPU mappable region */
size_t stolen_size; /* Total size of stolen memory */
size_t stolen_usable_size; /* Total size minus BIOS reserved */
size_t stolen_reserved_base;
size_t stolen_reserved_size;
u64 mappable_end; /* End offset that we can CPU map */
struct io_mapping *mappable; /* Mapping to our CPU mappable region */
phys_addr_t mappable_base; /* PA of our GMADR */
/** "Graphics Stolen Memory" holds the global PTEs */
@ -608,24 +634,11 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev);
int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
static inline bool
i915_ggtt_view_equal(const struct i915_ggtt_view *a,
const struct i915_ggtt_view *b)
{
if (WARN_ON(!a || !b))
return false;
if (a->type != b->type)
return false;
if (a->type != I915_GGTT_VIEW_NORMAL)
return !memcmp(&a->params, &b->params, sizeof(a->params));
return true;
}
/* Flags used by pin/bind&friends. */
#define PIN_NONBLOCK BIT(0)
#define PIN_MAPPABLE BIT(1)
#define PIN_ZONE_4G BIT(2)
#define PIN_NONFAULT BIT(3)
#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
@ -715,4 +728,10 @@ static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
i915_vma_unpin(vma);
}
static inline struct page *i915_vma_first_page(struct i915_vma *vma)
{
GEM_BUG_ON(!vma->pages);
return sg_page(vma->pages->sgl);
}
#endif

View file

@ -30,8 +30,7 @@
struct render_state {
const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
struct i915_vma *vma;
u32 aux_batch_size;
u32 aux_batch_offset;
};
@ -73,7 +72,7 @@ render_state_get_rodata(const struct drm_i915_gem_request *req)
static int render_state_setup(struct render_state *so)
{
struct drm_device *dev = so->obj->base.dev;
struct drm_device *dev = so->vma->vm->dev;
const struct intel_renderstate_rodata *rodata = so->rodata;
const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
unsigned int i = 0, reloc_index = 0;
@ -81,18 +80,18 @@ static int render_state_setup(struct render_state *so)
u32 *d;
int ret;
ret = i915_gem_object_set_to_cpu_domain(so->obj, true);
ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
if (ret)
return ret;
page = i915_gem_object_get_dirty_page(so->obj, 0);
page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
d = kmap(page);
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
if (i * 4 == rodata->reloc[reloc_index]) {
u64 r = s + so->ggtt_offset;
u64 r = s + so->vma->node.start;
s = lower_32_bits(r);
if (has_64bit_reloc) {
if (i + 1 >= rodata->batch_items ||
@ -154,7 +153,7 @@ static int render_state_setup(struct render_state *so)
kunmap(page);
ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
if (ret)
return ret;
@ -175,6 +174,7 @@ static int render_state_setup(struct render_state *so)
int i915_gem_render_state_init(struct drm_i915_gem_request *req)
{
struct render_state so;
struct drm_i915_gem_object *obj;
int ret;
if (WARN_ON(req->engine->id != RCS))
@ -187,21 +187,25 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
if (so.rodata->batch_items * 4 > 4096)
return -EINVAL;
so.obj = i915_gem_object_create(&req->i915->drm, 4096);
if (IS_ERR(so.obj))
return PTR_ERR(so.obj);
obj = i915_gem_object_create(&req->i915->drm, 4096);
if (IS_ERR(obj))
return PTR_ERR(obj);
ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0);
so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
if (IS_ERR(so.vma)) {
ret = PTR_ERR(so.vma);
goto err_obj;
}
ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
if (ret)
goto err_obj;
so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj);
ret = render_state_setup(&so);
if (ret)
goto err_unpin;
ret = req->engine->emit_bb_start(req, so.ggtt_offset,
ret = req->engine->emit_bb_start(req, so.vma->node.start,
so.rodata->batch_items * 4,
I915_DISPATCH_SECURE);
if (ret)
@ -209,7 +213,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
if (so.aux_batch_size > 8) {
ret = req->engine->emit_bb_start(req,
(so.ggtt_offset +
(so.vma->node.start +
so.aux_batch_offset),
so.aux_batch_size,
I915_DISPATCH_SECURE);
@ -217,10 +221,10 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
goto err_unpin;
}
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
i915_vma_move_to_active(so.vma, req, 0);
err_unpin:
i915_gem_object_ggtt_unpin(so.obj);
i915_vma_unpin(so.vma);
err_obj:
i915_gem_object_put(so.obj);
i915_gem_object_put(obj);
return ret;
}

View file

@ -24,7 +24,7 @@
#ifndef _I915_GEM_RENDER_STATE_H_
#define _I915_GEM_RENDER_STATE_H_
#include <linux/types.h>
struct drm_i915_gem_request;
int i915_gem_render_state_init(struct drm_i915_gem_request *req);

View file

@ -137,8 +137,6 @@ int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
list_add_tail(&req->client_list, &file_priv->mm.request_list);
spin_unlock(&file_priv->mm.lock);
req->pid = get_pid(task_pid(current));
return 0;
}
@ -154,9 +152,6 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
list_del(&request->client_list);
request->file_priv = NULL;
spin_unlock(&file_priv->mm.lock);
put_pid(request->pid);
request->pid = NULL;
}
void i915_gem_retire_noop(struct i915_gem_active *active,
@ -355,7 +350,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (req && i915_gem_request_completed(req))
i915_gem_request_retire(req);
req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
/* Beware: Dragons be flying overhead.
*
* We use RCU to look up requests in flight. The lookups may
* race with the request being allocated from the slab freelist.
* That is the request we are writing to here, may be in the process
* of being read by __i915_gem_active_get_rcu(). As such,
* we have to be very careful when overwriting the contents. During
* the RCU lookup, we change chase the request->engine pointer,
* read the request->fence.seqno and increment the reference count.
*
* The reference count is incremented atomically. If it is zero,
* the lookup knows the request is unallocated and complete. Otherwise,
* it is either still in use, or has been reallocated and reset
* with fence_init(). This increment is safe for release as we check
* that the request we have a reference to and matches the active
* request.
*
* Before we increment the refcount, we chase the request->engine
* pointer. We must not call kmem_cache_zalloc() or else we set
* that pointer to NULL and cause a crash during the lookup. If
* we see the request is completed (based on the value of the
* old engine and seqno), the lookup is complete and reports NULL.
* If we decide the request is not completed (new engine or seqno),
* then we grab a reference and double check that it is still the
* active request - which it won't be and restart the lookup.
*
* Do not use kmem_cache_zalloc() here!
*/
req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
@ -375,6 +398,12 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->engine = engine;
req->ctx = i915_gem_context_get(ctx);
/* No zalloc, must clear what we need by hand */
req->previous_context = NULL;
req->file_priv = NULL;
req->batch = NULL;
req->elsp_submitted = 0;
/*
* Reserve space in the ring buffer for all the commands required to
* eventually emit this request. This is to guarantee that the
@ -391,6 +420,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (ret)
goto err_ctx;
/* Record the position of the start of the request so that
* should we detect the updated seqno part-way through the
* GPU processing the request, we never over-estimate the
* position of the head.
*/
req->head = req->ring->tail;
return req;
err_ctx:
@ -426,22 +462,14 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
* request is not being tracked for completion but the work itself is
* going to happen on the hardware. This would be a Bad Thing(tm).
*/
void __i915_add_request(struct drm_i915_gem_request *request,
struct drm_i915_gem_object *obj,
bool flush_caches)
void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
{
struct intel_engine_cs *engine;
struct intel_ring *ring;
struct intel_engine_cs *engine = request->engine;
struct intel_ring *ring = request->ring;
u32 request_start;
u32 reserved_tail;
int ret;
if (WARN_ON(!request))
return;
engine = request->engine;
ring = request->ring;
/*
* To ensure that this call will not fail, space for its emissions
* should already have been reserved in the ring buffer. Let the ring
@ -467,16 +495,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
trace_i915_gem_request_add(request);
request->head = request_start;
/* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the
* inactive_list and lose its active reference. Hence we do not need
* to explicitly hold another reference here.
*/
request->batch_obj = obj;
/* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during
* hangcheck. Hence we apply the barrier to ensure that we do not
@ -489,10 +507,10 @@ void __i915_add_request(struct drm_i915_gem_request *request,
list_add_tail(&request->link, &engine->request_list);
list_add_tail(&request->ring_link, &ring->request_list);
/* Record the position of the start of the request so that
/* Record the position of the start of the breadcrumb so that
* should we detect the updated seqno part-way through the
* GPU processing the request, we never over-estimate the
* position of the head.
* position of the ring's HEAD.
*/
request->postfix = ring->tail;

View file

@ -51,6 +51,13 @@ struct intel_signal_node {
* emission time to be associated with the request for tracking how far ahead
* of the GPU the submission is.
*
* When modifying this structure be very aware that we perform a lockless
* RCU lookup of it that may race against reallocation of the struct
* from the slab freelist. We intentionally do not zero the structure on
* allocation so that the lookup can use the dangling pointers (and is
* cogniscent that those pointers may be wrong). Instead, everything that
* needs to be initialised must be done so explicitly.
*
* The requests are reference counted.
*/
struct drm_i915_gem_request {
@ -111,7 +118,7 @@ struct drm_i915_gem_request {
/** Batch buffer related to this request if any (used for
* error state dump only).
*/
struct drm_i915_gem_object *batch_obj;
struct i915_vma *batch;
struct list_head active_list;
/** Time at which this request was emitted, in jiffies. */
@ -127,9 +134,6 @@ struct drm_i915_gem_request {
/** file_priv list entry for this request */
struct list_head client_list;
/** process identifier submitting this request */
struct pid *pid;
/**
* The ELSP only accepts two elements at a time, so we queue
* context/tail pairs on a given queue (ring->execlist_queue) until the
@ -218,13 +222,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
void __i915_add_request(struct drm_i915_gem_request *req,
struct drm_i915_gem_object *batch_obj,
bool flush_caches);
void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
#define i915_add_request(req) \
__i915_add_request(req, NULL, true)
__i915_add_request(req, true)
#define i915_add_request_no_flush(req) \
__i915_add_request(req, NULL, false)
__i915_add_request(req, false)
struct intel_rps_client;
#define NO_WAITBOOST ERR_PTR(-1)
@ -359,6 +361,21 @@ __i915_gem_active_peek(const struct i915_gem_active *active)
return rcu_dereference_protected(active->request, 1);
}
/**
* i915_gem_active_raw - return the active request
* @active - the active tracker
*
* i915_gem_active_raw() returns the current request being tracked, or NULL.
* It does not obtain a reference on the request for the caller, so the caller
* must hold struct_mutex.
*/
static inline struct drm_i915_gem_request *
i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
{
return rcu_dereference_protected(active->request,
lockdep_is_held(mutex));
}
/**
* i915_gem_active_peek - report the active request being monitored
* @active - the active tracker
@ -372,29 +389,7 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
{
struct drm_i915_gem_request *request;
request = rcu_dereference_protected(active->request,
lockdep_is_held(mutex));
if (!request || i915_gem_request_completed(request))
return NULL;
return request;
}
/**
* i915_gem_active_peek_rcu - report the active request being monitored
* @active - the active tracker
*
* i915_gem_active_peek_rcu() returns the current request being tracked if
* still active, or NULL. It does not obtain a reference on the request
* for the caller, and inspection of the request is only valid under
* the RCU lock.
*/
static inline struct drm_i915_gem_request *
i915_gem_active_peek_rcu(const struct i915_gem_active *active)
{
struct drm_i915_gem_request *request;
request = rcu_dereference(active->request);
request = i915_gem_active_raw(active, mutex);
if (!request || i915_gem_request_completed(request))
return NULL;
@ -465,6 +460,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* just report the active tracker is idle. If the new request is
* incomplete, then we acquire a reference on it and check that
* it remained the active request.
*
* It is then imperative that we do not zero the request on
* reallocation, so that we can chase the dangling pointers!
* See i915_gem_request_alloc().
*/
do {
struct drm_i915_gem_request *request;
@ -497,6 +496,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* incremented) then the following read for rcu_access_pointer()
* must occur after the atomic operation and so confirm
* that this request is the one currently being tracked.
*
* The corresponding write barrier is part of
* rcu_assign_pointer().
*/
if (!request || request == rcu_access_pointer(active->request))
return rcu_pointer_handoff(request);
@ -635,8 +637,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
struct drm_i915_gem_request *request;
int ret;
request = rcu_dereference_protected(active->request,
lockdep_is_held(mutex));
request = i915_gem_active_raw(active, mutex);
if (!request)
return 0;

View file

@ -115,17 +115,28 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
base = bsm & INTEL_BSM_MASK;
} else if (IS_I865G(dev)) {
u32 tseg_size = 0;
u16 toud = 0;
u8 tmp;
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0),
I845_ESMRAMC, &tmp);
if (tmp & TSEG_ENABLE) {
switch (tmp & I845_TSEG_SIZE_MASK) {
case I845_TSEG_SIZE_512K:
tseg_size = KB(512);
break;
case I845_TSEG_SIZE_1M:
tseg_size = MB(1);
break;
}
}
/*
* FIXME is the graphics stolen memory region
* always at TOUD? Ie. is it always the last
* one to be allocated by the BIOS?
*/
pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0),
I865_TOUD, &toud);
base = toud << 16;
base = (toud << 16) + tseg_size;
} else if (IS_I85X(dev)) {
u32 tseg_size = 0;
u32 tom;
@ -685,7 +696,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
if (gtt_offset == I915_GTT_OFFSET_NONE)
return obj;
vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base);
vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
@ -705,6 +716,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
goto err;
}
vma->pages = obj->pages;
vma->flags |= I915_VMA_GLOBAL_BIND;
__i915_vma_set_map_and_fenceable(vma);
list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);

View file

@ -116,37 +116,58 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
return true;
}
/* Is the current GTT allocation valid for the change in tiling? */
static bool
i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
u32 size;
if (tiling_mode == I915_TILING_NONE)
if (!i915_vma_is_map_and_fenceable(vma))
return true;
if (INTEL_GEN(dev_priv) >= 4)
return true;
if (IS_GEN3(dev_priv)) {
if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK)
if (INTEL_GEN(dev_priv) == 3) {
if (vma->node.start & ~I915_FENCE_START_MASK)
return false;
} else {
if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK)
if (vma->node.start & ~I830_FENCE_START_MASK)
return false;
}
size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode);
if (i915_gem_obj_ggtt_size(obj) != size)
size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
if (vma->node.size < size)
return false;
if (i915_gem_obj_ggtt_offset(obj) & (size - 1))
if (vma->node.start & (size - 1))
return false;
return true;
}
/* Make the current GTT allocation valid for the change in tiling. */
static int
i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
if (tiling_mode == I915_TILING_NONE)
return 0;
if (INTEL_GEN(dev_priv) >= 4)
return 0;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (i915_vma_fence_prepare(vma, tiling_mode))
continue;
ret = i915_vma_unbind(vma);
if (ret)
return ret;
}
return 0;
}
/**
* i915_gem_set_tiling - IOCTL handler to set tiling mode
* @dev: DRM device
@ -168,7 +189,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
struct drm_i915_gem_set_tiling *args = data;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
int ret = 0;
int err = 0;
/* Make sure we don't cross-contaminate obj->tiling_and_stride */
BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
@ -187,7 +208,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
mutex_lock(&dev->struct_mutex);
if (obj->pin_display || obj->framebuffer_references) {
ret = -EBUSY;
err = -EBUSY;
goto err;
}
@ -234,11 +255,11 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
* has to also include the unfenced register the GPU uses
* whilst executing a fenced command for an untiled object.
*/
if (obj->map_and_fenceable &&
!i915_gem_object_fence_ok(obj, args->tiling_mode))
ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
if (ret == 0) {
err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
if (!err) {
struct i915_vma *vma;
if (obj->pages &&
obj->madv == I915_MADV_WILLNEED &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@ -248,11 +269,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
i915_gem_object_pin_pages(obj);
}
obj->fence_dirty =
!i915_gem_active_is_idle(&obj->last_fence,
&dev->struct_mutex) ||
obj->fence_reg != I915_FENCE_REG_NONE;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!vma->fence)
continue;
vma->fence->dirty = true;
}
obj->tiling_and_stride =
args->stride | args->tiling_mode;
@ -281,7 +303,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
intel_runtime_pm_put(dev_priv);
return ret;
return err;
}
/**

View file

@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
}
}
obj->userptr.work = ERR_PTR(ret);
if (ret)
__i915_gem_userptr_set_active(obj, false);
}
obj->userptr.workers--;
@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
* to the vma (discard or cloning) which should prevent the more
* egregious cases from causing harm.
*/
if (IS_ERR(obj->userptr.work)) {
/* active flag will have been dropped already by the worker */
ret = PTR_ERR(obj->userptr.work);
obj->userptr.work = NULL;
return ret;
}
if (obj->userptr.work)
if (obj->userptr.work) {
/* active flag should still be held for the pending work */
return -EAGAIN;
if (IS_ERR(obj->userptr.work))
return PTR_ERR(obj->userptr.work);
else
return -EAGAIN;
}
/* Let the mmu-notifier know that we have begun and need cancellation */
ret = __i915_gem_userptr_set_active(obj, true);

View file

@ -42,16 +42,6 @@ static const char *engine_str(int engine)
}
}
static const char *pin_flag(int pinned)
{
if (pinned > 0)
return " P";
else if (pinned < 0)
return " p";
else
return "";
}
static const char *tiling_flag(int tiling)
{
switch (tiling) {
@ -189,7 +179,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
{
int i;
err_printf(m, " %s [%d]:\n", name, count);
err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
err_printf(m, " %08x_%08x %8u %02x %02x [ ",
@ -202,7 +192,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
err_printf(m, "%02x ", err->rseqno[i]);
err_printf(m, "] %02x", err->wseqno);
err_puts(m, pin_flag(err->pinned));
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@ -247,14 +236,23 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
err_printf(m, " HEAD: 0x%08x\n", ee->head);
err_printf(m, " TAIL: 0x%08x\n", ee->tail);
err_printf(m, " CTL: 0x%08x\n", ee->ctl);
err_printf(m, " MODE: 0x%08x\n", ee->mode);
err_printf(m, " HWS: 0x%08x\n", ee->hws);
err_printf(m, " ACTHD: 0x%08x %08x\n",
(u32)(ee->acthd>>32), (u32)ee->acthd);
err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir);
err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr);
err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone);
if (ee->batchbuffer) {
u64 start = ee->batchbuffer->gtt_offset;
u64 end = start + ee->batchbuffer->gtt_size;
err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n",
upper_32_bits(start), lower_32_bits(start),
upper_32_bits(end), lower_32_bits(end));
}
if (INTEL_GEN(m->i915) >= 4) {
err_printf(m, " BBADDR: 0x%08x %08x\n",
err_printf(m, " BBADDR: 0x%08x_%08x\n",
(u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate);
err_printf(m, " INSTPS: 0x%08x\n", ee->instps);
@ -323,6 +321,16 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
}
}
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
const struct intel_device_info *info)
{
#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x))
#define SEP_SEMICOLON ;
DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON);
#undef PRINT_FLAG
#undef SEP_SEMICOLON
}
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
const struct i915_error_state_file_priv *error_priv)
{
@ -342,6 +350,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
error->time.tv_usec);
err_printf(m, "Kernel: " UTS_RELEASE "\n");
err_print_capabilities(m, &error->device_info);
max_hangcheck_score = 0;
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
if (error->engine[i].hangcheck_score > max_hangcheck_score)
@ -414,18 +423,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
error_print_engine(m, &error->engine[i]);
}
for (i = 0; i < error->vm_count; i++) {
err_printf(m, "vm[%d]\n", i);
for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
char buf[128];
int len, first = 1;
print_error_buffers(m, "Active",
if (!error->active_vm[i])
break;
len = scnprintf(buf, sizeof(buf), "Active (");
for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
if (error->engine[j].vm != error->active_vm[i])
continue;
len += scnprintf(buf + len, sizeof(buf), "%s%s",
first ? "" : ", ",
dev_priv->engine[j].name);
first = 0;
}
scnprintf(buf + len, sizeof(buf), ")");
print_error_buffers(m, buf,
error->active_bo[i],
error->active_bo_count[i]);
print_error_buffers(m, "Pinned",
error->pinned_bo[i],
error->pinned_bo_count[i]);
}
print_error_buffers(m, "Pinned (global)",
error->pinned_bo,
error->pinned_bo_count);
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
struct drm_i915_error_engine *ee = &error->engine[i];
@ -455,9 +479,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
dev_priv->engine[i].name,
ee->num_requests);
for (j = 0; j < ee->num_requests; j++) {
err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n",
err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x, tail 0x%08x\n",
ee->requests[j].pid,
ee->requests[j].seqno,
ee->requests[j].jiffies,
ee->requests[j].head,
ee->requests[j].tail);
}
}
@ -533,7 +559,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
}
}
if ((obj = error->semaphore_obj)) {
if ((obj = error->semaphore)) {
err_printf(m, "Semaphore page = 0x%08x\n",
lower_32_bits(obj->gtt_offset));
for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
@ -624,15 +650,12 @@ static void i915_error_state_free(struct kref *error_ref)
kfree(ee->waiters);
}
i915_error_object_free(error->semaphore_obj);
i915_error_object_free(error->semaphore);
for (i = 0; i < error->vm_count; i++)
for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
kfree(error->active_bo);
kfree(error->active_bo_count);
kfree(error->pinned_bo);
kfree(error->pinned_bo_count);
kfree(error->overlay);
kfree(error->display);
kfree(error);
@ -640,46 +663,45 @@ static void i915_error_state_free(struct kref *error_ref)
static struct drm_i915_error_object *
i915_error_object_create(struct drm_i915_private *dev_priv,
struct drm_i915_gem_object *src,
struct i915_address_space *vm)
struct i915_vma *vma)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_i915_gem_object *src;
struct drm_i915_error_object *dst;
struct i915_vma *vma = NULL;
int num_pages;
bool use_ggtt;
int i = 0;
u64 reloc_offset;
if (src == NULL || src->pages == NULL)
if (!vma)
return NULL;
src = vma->obj;
if (!src->pages)
return NULL;
num_pages = src->base.size >> PAGE_SHIFT;
dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
if (dst == NULL)
if (!dst)
return NULL;
if (i915_gem_obj_bound(src, vm))
dst->gtt_offset = i915_gem_obj_offset(src, vm);
else
dst->gtt_offset = -1;
dst->gtt_offset = vma->node.start;
dst->gtt_size = vma->node.size;
reloc_offset = dst->gtt_offset;
if (i915_is_ggtt(vm))
vma = i915_gem_obj_to_ggtt(src);
use_ggtt = (src->cache_level == I915_CACHE_NONE &&
vma && (vma->flags & I915_VMA_GLOBAL_BIND) &&
(vma->flags & I915_VMA_GLOBAL_BIND) &&
reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
/* Cannot access stolen address directly, try to use the aperture */
if (src->stolen) {
use_ggtt = true;
if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND))
if (!(vma->flags & I915_VMA_GLOBAL_BIND))
goto unwind;
reloc_offset = i915_gem_obj_ggtt_offset(src);
reloc_offset = vma->node.start;
if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
goto unwind;
}
@ -707,7 +729,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
* captures what the GPU read.
*/
s = io_mapping_map_atomic_wc(ggtt->mappable,
s = io_mapping_map_atomic_wc(&ggtt->mappable,
reloc_offset);
memcpy_fromio(d, s, PAGE_SIZE);
io_mapping_unmap_atomic(s);
@ -739,8 +761,6 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
kfree(dst);
return NULL;
}
#define i915_error_ggtt_object_create(dev_priv, src) \
i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base)
/* The error capture is special as tries to run underneath the normal
* locking rules - so we use the raw version of the i915_gem_active lookup.
@ -777,10 +797,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->gtt_offset = vma->node.start;
err->read_domains = obj->base.read_domains;
err->write_domain = obj->base.write_domain;
err->fence_reg = obj->fence_reg;
err->pinned = 0;
if (i915_gem_obj_is_pinned(obj))
err->pinned = 1;
err->fence_reg = vma->fence ? vma->fence->id : -1;
err->tiling = i915_gem_object_get_tiling(obj);
err->dirty = obj->dirty;
err->purgeable = obj->madv != I915_MADV_WILLNEED;
@ -788,13 +805,17 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->cache_level = obj->cache_level;
}
static u32 capture_active_bo(struct drm_i915_error_buffer *err,
int count, struct list_head *head)
static u32 capture_error_bo(struct drm_i915_error_buffer *err,
int count, struct list_head *head,
bool pinned_only)
{
struct i915_vma *vma;
int i = 0;
list_for_each_entry(vma, head, vm_link) {
if (pinned_only && !i915_vma_is_pinned(vma))
continue;
capture_bo(err++, vma);
if (++i == count)
break;
@ -803,28 +824,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
return i;
}
static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
int count, struct list_head *head,
struct i915_address_space *vm)
{
struct drm_i915_gem_object *obj;
struct drm_i915_error_buffer * const first = err;
struct drm_i915_error_buffer * const last = err + count;
list_for_each_entry(obj, head, global_list) {
struct i915_vma *vma;
if (err == last)
break;
list_for_each_entry(vma, &obj->vma_list, obj_link)
if (vma->vm == vm && i915_vma_is_pinned(vma))
capture_bo(err++, vma);
}
return err - first;
}
/* Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by
* grossly estimating a GPU error state.
@ -884,7 +883,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
struct intel_engine_cs *to;
enum intel_engine_id id;
if (!error->semaphore_obj)
if (!error->semaphore)
return;
for_each_engine_id(to, dev_priv, id) {
@ -897,7 +896,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
signal_offset =
(GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
tmp = error->semaphore_obj->pages[0];
tmp = error->semaphore->pages[0];
idx = intel_engine_sync_index(engine, to);
ee->semaphore_mboxes[idx] = tmp[signal_offset];
@ -1007,6 +1006,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
ee->head = I915_READ_HEAD(engine);
ee->tail = I915_READ_TAIL(engine);
ee->ctl = I915_READ_CTL(engine);
if (INTEL_GEN(dev_priv) > 2)
ee->mode = I915_READ_MODE(engine);
if (I915_NEED_GFX_HWS(dev_priv)) {
i915_reg_t mmio;
@ -1062,45 +1063,76 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
}
}
static void i915_gem_record_active_context(struct intel_engine_cs *engine,
struct drm_i915_error_state *error,
struct drm_i915_error_engine *ee)
static void engine_record_requests(struct intel_engine_cs *engine,
struct drm_i915_gem_request *first,
struct drm_i915_error_engine *ee)
{
struct drm_i915_private *dev_priv = engine->i915;
struct drm_i915_gem_object *obj;
struct drm_i915_gem_request *request;
int count;
/* Currently render ring is the only HW context user */
if (engine->id != RCS || !error->ccid)
count = 0;
request = first;
list_for_each_entry_from(request, &engine->request_list, link)
count++;
if (!count)
return;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (!i915_gem_obj_ggtt_bound(obj))
continue;
ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
if (!ee->requests)
return;
if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
ee->ctx = i915_error_ggtt_object_create(dev_priv, obj);
ee->num_requests = count;
count = 0;
request = first;
list_for_each_entry_from(request, &engine->request_list, link) {
struct drm_i915_error_request *erq;
if (count >= ee->num_requests) {
/*
* If the ring request list was changed in
* between the point where the error request
* list was created and dimensioned and this
* point then just exit early to avoid crashes.
*
* We don't need to communicate that the
* request list changed state during error
* state capture and that the error state is
* slightly incorrect as a consequence since we
* are typically only interested in the request
* list state at the point of error state
* capture, not in any changes happening during
* the capture.
*/
break;
}
erq = &ee->requests[count++];
erq->seqno = request->fence.seqno;
erq->jiffies = request->emitted_jiffies;
erq->head = request->head;
erq->tail = request->tail;
rcu_read_lock();
erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
rcu_read_unlock();
}
ee->num_requests = count;
}
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_i915_gem_request *request;
int i, count;
int i;
if (dev_priv->semaphore_obj) {
error->semaphore_obj =
i915_error_ggtt_object_create(dev_priv,
dev_priv->semaphore_obj);
}
error->semaphore =
i915_error_object_create(dev_priv, dev_priv->semaphore);
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = &dev_priv->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
struct drm_i915_gem_request *request;
ee->pid = -1;
ee->engine_id = -1;
@ -1115,10 +1147,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
request = i915_gem_find_active_request(engine);
if (request) {
struct i915_address_space *vm;
struct intel_ring *ring;
struct pid *pid;
vm = request->ctx->ppgtt ?
ee->vm = request->ctx->ppgtt ?
&request->ctx->ppgtt->base : &ggtt->base;
/* We need to copy these to an anonymous buffer
@ -1127,19 +1159,23 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
*/
ee->batchbuffer =
i915_error_object_create(dev_priv,
request->batch_obj,
vm);
request->batch);
if (HAS_BROKEN_CS_TLB(dev_priv))
ee->wa_batchbuffer =
i915_error_ggtt_object_create(dev_priv,
engine->scratch.obj);
i915_error_object_create(dev_priv,
engine->scratch);
if (request->pid) {
ee->ctx =
i915_error_object_create(dev_priv,
request->ctx->engine[i].state);
pid = request->ctx->pid;
if (pid) {
struct task_struct *task;
rcu_read_lock();
task = pid_task(request->pid, PIDTYPE_PID);
task = pid_task(pid, PIDTYPE_PID);
if (task) {
strcpy(ee->comm, task->comm);
ee->pid = task->pid;
@ -1154,145 +1190,102 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail;
ee->ringbuffer =
i915_error_ggtt_object_create(dev_priv,
ring->obj);
i915_error_object_create(dev_priv, ring->vma);
engine_record_requests(engine, request, ee);
}
ee->hws_page =
i915_error_ggtt_object_create(dev_priv,
engine->status_page.obj);
i915_error_object_create(dev_priv,
engine->status_page.vma);
ee->wa_ctx = i915_error_ggtt_object_create(dev_priv,
engine->wa_ctx.obj);
i915_gem_record_active_context(engine, error, ee);
count = 0;
list_for_each_entry(request, &engine->request_list, link)
count++;
ee->num_requests = count;
ee->requests =
kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
if (!ee->requests) {
ee->num_requests = 0;
continue;
}
count = 0;
list_for_each_entry(request, &engine->request_list, link) {
struct drm_i915_error_request *erq;
if (count >= ee->num_requests) {
/*
* If the ring request list was changed in
* between the point where the error request
* list was created and dimensioned and this
* point then just exit early to avoid crashes.
*
* We don't need to communicate that the
* request list changed state during error
* state capture and that the error state is
* slightly incorrect as a consequence since we
* are typically only interested in the request
* list state at the point of error state
* capture, not in any changes happening during
* the capture.
*/
break;
}
erq = &ee->requests[count++];
erq->seqno = request->fence.seqno;
erq->jiffies = request->emitted_jiffies;
erq->tail = request->postfix;
}
ee->wa_ctx =
i915_error_object_create(dev_priv, engine->wa_ctx.vma);
}
}
/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
* VM.
*/
static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error,
struct i915_address_space *vm,
const int ndx)
int idx)
{
struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
struct drm_i915_gem_object *obj;
struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma;
int i;
int count;
i = 0;
count = 0;
list_for_each_entry(vma, &vm->active_list, vm_link)
i++;
error->active_bo_count[ndx] = i;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
list_for_each_entry(vma, &obj->vma_list, obj_link)
if (vma->vm == vm && i915_vma_is_pinned(vma))
i++;
}
error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
if (i) {
active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC);
if (active_bo)
pinned_bo = active_bo + error->active_bo_count[ndx];
}
count++;
active_bo = NULL;
if (count)
active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
if (active_bo)
error->active_bo_count[ndx] =
capture_active_bo(active_bo,
error->active_bo_count[ndx],
&vm->active_list);
count = capture_error_bo(active_bo, count, &vm->active_list, false);
else
count = 0;
if (pinned_bo)
error->pinned_bo_count[ndx] =
capture_pinned_bo(pinned_bo,
error->pinned_bo_count[ndx],
&dev_priv->mm.bound_list, vm);
error->active_bo[ndx] = active_bo;
error->pinned_bo[ndx] = pinned_bo;
error->active_vm[idx] = vm;
error->active_bo[idx] = active_bo;
error->active_bo_count[idx] = count;
}
static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
struct i915_address_space *vm;
int cnt = 0, i = 0;
int cnt = 0, i, j;
list_for_each_entry(vm, &dev_priv->vm_list, global_link)
cnt++;
BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
GFP_ATOMIC);
error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
GFP_ATOMIC);
/* Scan each engine looking for unique active contexts/vm */
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
struct drm_i915_error_engine *ee = &error->engine[i];
bool found;
if (error->active_bo == NULL ||
error->pinned_bo == NULL ||
error->active_bo_count == NULL ||
error->pinned_bo_count == NULL) {
kfree(error->active_bo);
kfree(error->active_bo_count);
kfree(error->pinned_bo);
kfree(error->pinned_bo_count);
if (!ee->vm)
continue;
error->active_bo = NULL;
error->active_bo_count = NULL;
error->pinned_bo = NULL;
error->pinned_bo_count = NULL;
} else {
list_for_each_entry(vm, &dev_priv->vm_list, global_link)
i915_gem_capture_vm(dev_priv, error, vm, i++);
error->vm_count = cnt;
found = false;
for (j = 0; j < i && !found; j++)
found = error->engine[j].vm == ee->vm;
if (!found)
i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++);
}
}
static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
struct i915_address_space *vm = &dev_priv->ggtt.base;
struct drm_i915_error_buffer *bo;
struct i915_vma *vma;
int count_inactive, count_active;
count_inactive = 0;
list_for_each_entry(vma, &vm->active_list, vm_link)
count_inactive++;
count_active = 0;
list_for_each_entry(vma, &vm->inactive_list, vm_link)
count_active++;
bo = NULL;
if (count_inactive + count_active)
bo = kcalloc(count_inactive + count_active,
sizeof(*bo), GFP_ATOMIC);
if (!bo)
return;
count_inactive = capture_error_bo(bo, count_inactive,
&vm->active_list, true);
count_active = capture_error_bo(bo + count_inactive, count_active,
&vm->inactive_list, true);
error->pinned_bo_count = count_inactive + count_active;
error->pinned_bo = bo;
}
/* Capture all registers which don't fit into another category. */
static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
@ -1403,6 +1396,10 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
#endif
error->reset_count = i915_reset_count(&dev_priv->gpu_error);
error->suspend_count = dev_priv->suspend_count;
memcpy(&error->device_info,
INTEL_INFO(dev_priv),
sizeof(error->device_info));
}
/**
@ -1436,9 +1433,10 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
i915_capture_gen_state(dev_priv, error);
i915_capture_reg_state(dev_priv, error);
i915_gem_capture_buffers(dev_priv, error);
i915_gem_record_fences(dev_priv, error);
i915_gem_record_rings(dev_priv, error);
i915_capture_active_buffers(dev_priv, error);
i915_capture_pinned_buffers(dev_priv, error);
do_gettimeofday(&error->time);

View file

@ -183,7 +183,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc,
struct i915_guc_client *client,
u16 new_id)
{
struct sg_table *sg = guc->ctx_pool_obj->pages;
struct sg_table *sg = guc->ctx_pool_vma->pages;
void *doorbell_bitmap = guc->doorbell_bitmap;
struct guc_doorbell_info *doorbell;
struct guc_context_desc desc;
@ -325,7 +325,6 @@ static void guc_init_proc_desc(struct intel_guc *guc,
static void guc_init_ctx_desc(struct intel_guc *guc,
struct i915_guc_client *client)
{
struct drm_i915_gem_object *client_obj = client->client_obj;
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct intel_engine_cs *engine;
struct i915_gem_context *ctx = client->owner;
@ -340,10 +339,10 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
desc.priority = client->priority;
desc.db_id = client->doorbell_id;
for_each_engine(engine, dev_priv) {
for_each_engine_masked(engine, dev_priv, client->engines) {
struct intel_context *ce = &ctx->engine[engine->id];
struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
struct drm_i915_gem_object *obj;
uint32_t guc_engine_id = engine->guc_id;
struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
/* TODO: We have a design issue to be solved here. Only when we
* receive the first batch, we know which engine is used by the
@ -358,30 +357,29 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->context_desc = lower_32_bits(ce->lrc_desc);
/* The state page is after PPHWSP */
gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
lrc->ring_lcra =
i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
obj = ce->ring->obj;
gfx_addr = i915_gem_obj_ggtt_offset(obj);
lrc->ring_begin = gfx_addr;
lrc->ring_end = gfx_addr + obj->base.size - 1;
lrc->ring_next_free_location = gfx_addr;
lrc->ring_begin = i915_ggtt_offset(ce->ring->vma);
lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
lrc->ring_next_free_location = lrc->ring_begin;
lrc->ring_current_tail_pointer_value = 0;
desc.engines_used |= (1 << engine->guc_id);
desc.engines_used |= (1 << guc_engine_id);
}
DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
client->engines, desc.engines_used);
WARN_ON(desc.engines_used == 0);
/*
* The doorbell, process descriptor, and workqueue are all parts
* of the client object, which the GuC will reference via the GGTT
*/
gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
gfx_addr = i915_ggtt_offset(client->vma);
desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
client->doorbell_offset;
desc.db_trigger_cpu = (uintptr_t)client->client_base +
client->doorbell_offset;
@ -397,7 +395,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
desc.desc_private = (uintptr_t)client;
/* Pool context is pinned already */
sg = guc->ctx_pool_obj->pages;
sg = guc->ctx_pool_vma->pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
sizeof(desc) * client->ctx_index);
}
@ -410,7 +408,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
memset(&desc, 0, sizeof(desc));
sg = guc->ctx_pool_obj->pages;
sg = guc->ctx_pool_vma->pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
sizeof(desc) * client->ctx_index);
}
@ -457,6 +455,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
/* wqi_len is in DWords, and does not include the one-word header */
const size_t wqi_size = sizeof(struct guc_wq_item);
const u32 wqi_len = wqi_size/sizeof(u32) - 1;
struct intel_engine_cs *engine = rq->engine;
struct guc_process_desc *desc;
struct guc_wq_item *wqi;
void *base;
@ -492,18 +491,17 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
/* WQ starts from the page after doorbell / process_desc */
wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
wq_off &= PAGE_SIZE - 1;
base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page));
wqi = (struct guc_wq_item *)((char *)base + wq_off);
/* Now fill in the 4-word work queue item */
wqi->header = WQ_TYPE_INORDER |
(wqi_len << WQ_LEN_SHIFT) |
(rq->engine->guc_id << WQ_TARGET_SHIFT) |
(engine->guc_id << WQ_TARGET_SHIFT) |
WQ_NO_WCFLUSH_WAIT;
/* The GuC wants only the low-order word of the context descriptor */
wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
rq->engine);
wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
wqi->fence_id = rq->fence.seqno;
@ -611,55 +609,48 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
*/
/**
* gem_allocate_guc_obj() - Allocate gem object for GuC usage
* @dev_priv: driver private data structure
* @size: size of object
* guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
* @guc: the guc
* @size: size of area to allocate (both virtual space and memory)
*
* This is a wrapper to create a gem obj. In order to use it inside GuC, the
* object needs to be pinned lifetime. Also we must pin it to gtt space other
* than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC.
* This is a wrapper to create an object for use with the GuC. In order to
* use it inside the GuC, an object needs to be pinned lifetime, so we allocate
* both some backing storage and a range inside the Global GTT. We must pin
* it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
* range is reserved inside GuC.
*
* Return: A drm_i915_gem_object if successful, otherwise NULL.
* Return: A i915_vma if successful, otherwise an ERR_PTR.
*/
static struct drm_i915_gem_object *
gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int ret;
obj = i915_gem_object_create(&dev_priv->drm, size);
if (IS_ERR(obj))
return NULL;
return ERR_CAST(obj);
if (i915_gem_object_get_pages(obj)) {
i915_gem_object_put(obj);
return NULL;
}
vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
if (IS_ERR(vma))
goto err;
if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
i915_gem_object_put(obj);
return NULL;
ret = i915_vma_pin(vma, 0, PAGE_SIZE,
PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
if (ret) {
vma = ERR_PTR(ret);
goto err;
}
/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
return obj;
}
/**
* gem_release_guc_obj() - Release gem object allocated for GuC usage
* @obj: gem obj to be released
*/
static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
{
if (!obj)
return;
if (i915_gem_obj_is_pinned(obj))
i915_gem_object_ggtt_unpin(obj);
return vma;
err:
i915_gem_object_put(obj);
return vma;
}
static void
@ -686,7 +677,7 @@ guc_client_free(struct drm_i915_private *dev_priv,
kunmap(kmap_to_page(client->client_base));
}
gem_release_guc_obj(client->client_obj);
i915_vma_unpin_and_release(&client->vma);
if (client->ctx_index != GUC_INVALID_CTX_ID) {
guc_fini_ctx_desc(guc, client);
@ -696,29 +687,47 @@ guc_client_free(struct drm_i915_private *dev_priv,
kfree(client);
}
/* Check that a doorbell register is in the expected state */
static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
i915_reg_t drbreg = GEN8_DRBREGL(db_id);
uint32_t value = I915_READ(drbreg);
bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
bool expected = test_bit(db_id, guc->doorbell_bitmap);
if (enabled == expected)
return true;
DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
db_id, drbreg.reg, value,
expected ? "active" : "inactive");
return false;
}
/*
* Borrow the first client to set up & tear down every doorbell
* Borrow the first client to set up & tear down each unused doorbell
* in turn, to ensure that all doorbell h/w is (re)initialised.
*/
static void guc_init_doorbell_hw(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct i915_guc_client *client = guc->execbuf_client;
uint16_t db_id, i;
int err;
uint16_t db_id;
int i, err;
/* Save client's original doorbell selection */
db_id = client->doorbell_id;
for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
i915_reg_t drbreg = GEN8_DRBREGL(i);
u32 value = I915_READ(drbreg);
/* Skip if doorbell is OK */
if (guc_doorbell_check(guc, i))
continue;
err = guc_update_doorbell_id(guc, client, i);
/* Report update failure or unexpectedly active doorbell */
if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED)))
DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n",
i, drbreg.reg, value, err);
if (err)
DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
i, err);
}
/* Restore to original value */
@ -727,20 +736,15 @@ static void guc_init_doorbell_hw(struct intel_guc *guc)
DRM_ERROR("Failed to restore doorbell to %d, err %d\n",
db_id, err);
for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
i915_reg_t drbreg = GEN8_DRBREGL(i);
u32 value = I915_READ(drbreg);
if (i != db_id && (value & GUC_DOORBELL_ENABLED))
DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n",
i, drbreg.reg, value);
}
/* Read back & verify all doorbell registers */
for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
(void)guc_doorbell_check(guc, i);
}
/**
* guc_client_alloc() - Allocate an i915_guc_client
* @dev_priv: driver private data structure
* @engines: The set of engines to enable for this client
* @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
* The kernel client to replace ExecList submission is created with
* NORMAL priority. Priority of a client for scheduler can be HIGH,
@ -752,22 +756,24 @@ static void guc_init_doorbell_hw(struct intel_guc *guc)
*/
static struct i915_guc_client *
guc_client_alloc(struct drm_i915_private *dev_priv,
uint32_t engines,
uint32_t priority,
struct i915_gem_context *ctx)
{
struct i915_guc_client *client;
struct intel_guc *guc = &dev_priv->guc;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
uint16_t db_id;
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client)
return NULL;
client->doorbell_id = GUC_INVALID_DOORBELL_ID;
client->priority = priority;
client->owner = ctx;
client->guc = guc;
client->engines = engines;
client->priority = priority;
client->doorbell_id = GUC_INVALID_DOORBELL_ID;
client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
@ -777,13 +783,13 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
}
/* The first page is doorbell/proc_desc. Two followed pages are wq. */
obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE);
if (!obj)
vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
if (IS_ERR(vma))
goto err;
/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
client->client_obj = obj;
client->client_base = kmap(i915_gem_object_get_page(obj, 0));
client->vma = vma;
client->client_base = kmap(i915_vma_first_page(vma));
client->wq_offset = GUC_DB_SIZE;
client->wq_size = GUC_WQ_SIZE;
@ -809,8 +815,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
if (guc_init_doorbell(guc, client, db_id))
goto err;
DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n",
priority, client, client->ctx_index);
DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
priority, client, client->engines, client->ctx_index);
DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
client->doorbell_id, client->doorbell_offset);
@ -825,8 +831,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
static void guc_create_log(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
unsigned long offset;
uint32_t size, flags;
@ -842,16 +847,16 @@ static void guc_create_log(struct intel_guc *guc)
GUC_LOG_ISR_PAGES + 1 +
GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
obj = guc->log_obj;
if (!obj) {
obj = gem_allocate_guc_obj(dev_priv, size);
if (!obj) {
vma = guc->log_vma;
if (!vma) {
vma = guc_allocate_vma(guc, size);
if (IS_ERR(vma)) {
/* logging will be off */
i915.guc_log_level = -1;
return;
}
guc->log_obj = obj;
guc->log_vma = vma;
}
/* each allocated unit is a page */
@ -860,7 +865,7 @@ static void guc_create_log(struct intel_guc *guc)
(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */
offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
}
@ -889,7 +894,7 @@ static void init_guc_policies(struct guc_policies *policies)
static void guc_create_ads(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
struct guc_ads *ads;
struct guc_policies *policies;
struct guc_mmio_reg_state *reg_state;
@ -902,16 +907,16 @@ static void guc_create_ads(struct intel_guc *guc)
sizeof(struct guc_mmio_reg_state) +
GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
obj = guc->ads_obj;
if (!obj) {
obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size));
if (!obj)
vma = guc->ads_vma;
if (!vma) {
vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
if (IS_ERR(vma))
return;
guc->ads_obj = obj;
guc->ads_vma = vma;
}
page = i915_gem_object_get_page(obj, 0);
page = i915_vma_first_page(vma);
ads = kmap(page);
/*
@ -922,7 +927,7 @@ static void guc_create_ads(struct intel_guc *guc)
* to find it.
*/
engine = &dev_priv->engine[RCS];
ads->golden_context_lrca = engine->status_page.gfx_addr;
ads->golden_context_lrca = engine->status_page.ggtt_offset;
for_each_engine(engine, dev_priv)
ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine);
@ -931,8 +936,8 @@ static void guc_create_ads(struct intel_guc *guc)
policies = (void *)ads + sizeof(struct guc_ads);
init_guc_policies(policies);
ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) +
sizeof(struct guc_ads);
ads->scheduler_policies =
i915_ggtt_offset(vma) + sizeof(struct guc_ads);
/* MMIO reg state */
reg_state = (void *)policies + sizeof(struct guc_policies);
@ -960,10 +965,9 @@ static void guc_create_ads(struct intel_guc *guc)
*/
int i915_guc_submission_init(struct drm_i915_private *dev_priv)
{
const size_t ctxsize = sizeof(struct guc_context_desc);
const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
const size_t gemsize = round_up(poolsize, PAGE_SIZE);
struct intel_guc *guc = &dev_priv->guc;
struct i915_vma *vma;
u32 size;
/* Wipe bitmap & delete client in case of reinitialisation */
bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
@ -972,13 +976,15 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
if (!i915.enable_guc_submission)
return 0; /* not enabled */
if (guc->ctx_pool_obj)
if (guc->ctx_pool_vma)
return 0; /* already allocated */
guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize);
if (!guc->ctx_pool_obj)
return -ENOMEM;
size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc));
vma = guc_allocate_vma(guc, size);
if (IS_ERR(vma))
return PTR_ERR(vma);
guc->ctx_pool_vma = vma;
ida_init(&guc->ctx_ids);
guc_create_log(guc);
guc_create_ads(guc);
@ -994,6 +1000,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
/* client for execbuf submission */
client = guc_client_alloc(dev_priv,
INTEL_INFO(dev_priv)->ring_mask,
GUC_CTX_PRIORITY_KMD_NORMAL,
dev_priv->kernel_context);
if (!client) {
@ -1030,16 +1037,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
gem_release_guc_obj(dev_priv->guc.ads_obj);
guc->ads_obj = NULL;
i915_vma_unpin_and_release(&guc->ads_vma);
i915_vma_unpin_and_release(&guc->log_vma);
gem_release_guc_obj(dev_priv->guc.log_obj);
guc->log_obj = NULL;
if (guc->ctx_pool_obj)
if (guc->ctx_pool_vma)
ida_destroy(&guc->ctx_ids);
gem_release_guc_obj(guc->ctx_pool_obj);
guc->ctx_pool_obj = NULL;
i915_vma_unpin_and_release(&guc->ctx_pool_vma);
}
/**
@ -1062,7 +1065,7 @@ int intel_guc_suspend(struct drm_device *dev)
/* any value greater than GUC_POWER_D0 */
data[1] = GUC_POWER_D1;
/* first page is shared data with GuC */
data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
data[2] = i915_ggtt_offset(ctx->engine[RCS].state);
return host2guc_action(guc, data, ARRAY_SIZE(data));
}
@ -1087,7 +1090,7 @@ int intel_guc_resume(struct drm_device *dev)
data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
data[1] = GUC_POWER_D0;
/* first page is shared data with GuC */
data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state);
data[2] = i915_ggtt_offset(ctx->engine[RCS].state);
return host2guc_action(guc, data, ARRAY_SIZE(data));
}

View file

@ -972,10 +972,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
static void notify_ring(struct intel_engine_cs *engine)
{
smp_store_mb(engine->breadcrumbs.irq_posted, true);
if (intel_engine_wakeup(engine)) {
if (intel_engine_wakeup(engine))
trace_i915_gem_request_notify(engine);
engine->breadcrumbs.irq_wakeups++;
}
}
static void vlv_c0_read(struct drm_i915_private *dev_priv,
@ -3044,22 +3042,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
return HANGCHECK_HUNG;
}
static unsigned long kick_waiters(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
if (engine->hangcheck.user_interrupts == irq_count &&
!test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
engine->name);
intel_engine_enable_fake_irq(engine);
}
return irq_count;
}
/*
* This is called when the chip hasn't reported back with completed
* batchbuffers in a long time. We keep track per ring seqno progress and
@ -3097,7 +3079,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
bool busy = intel_engine_has_waiter(engine);
u64 acthd;
u32 seqno;
unsigned user_interrupts;
semaphore_clear_deadlocks(dev_priv);
@ -3114,15 +3095,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
acthd = intel_engine_get_active_head(engine);
seqno = intel_engine_get_seqno(engine);
/* Reset stuck interrupts between batch advances */
user_interrupts = 0;
if (engine->hangcheck.seqno == seqno) {
if (!intel_engine_is_active(engine)) {
engine->hangcheck.action = HANGCHECK_IDLE;
if (busy) {
/* Safeguard against driver failure */
user_interrupts = kick_waiters(engine);
engine->hangcheck.score += BUSY;
}
} else {
@ -3185,7 +3162,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
engine->hangcheck.seqno = seqno;
engine->hangcheck.acthd = acthd;
engine->hangcheck.user_interrupts = user_interrupts;
busy_count += busy;
}

View file

@ -0,0 +1,101 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include <linux/kernel.h>
#include <asm/fpu/api.h>
#include "i915_drv.h"
static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
#ifdef CONFIG_AS_MOVNTDQA
static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
{
kernel_fpu_begin();
len >>= 4;
while (len >= 4) {
asm("movntdqa (%0), %%xmm0\n"
"movntdqa 16(%0), %%xmm1\n"
"movntdqa 32(%0), %%xmm2\n"
"movntdqa 48(%0), %%xmm3\n"
"movaps %%xmm0, (%1)\n"
"movaps %%xmm1, 16(%1)\n"
"movaps %%xmm2, 32(%1)\n"
"movaps %%xmm3, 48(%1)\n"
:: "r" (src), "r" (dst) : "memory");
src += 64;
dst += 64;
len -= 4;
}
while (len--) {
asm("movntdqa (%0), %%xmm0\n"
"movaps %%xmm0, (%1)\n"
:: "r" (src), "r" (dst) : "memory");
src += 16;
dst += 16;
}
kernel_fpu_end();
}
#endif
/**
* i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
* @dst: destination pointer
* @src: source pointer
* @len: how many bytes to copy
*
* i915_memcpy_from_wc copies @len bytes from @src to @dst using
* non-temporal instructions where available. Note that all arguments
* (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
* of 16.
*
* To test whether accelerated reads from WC are supported, use
* i915_memcpy_from_wc(NULL, NULL, 0);
*
* Returns true if the copy was successful, false if the preconditions
* are not met.
*/
bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
{
if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
return false;
#ifdef CONFIG_AS_MOVNTDQA
if (static_branch_likely(&has_movntdqa)) {
if (likely(len))
__memcpy_ntdqa(dst, src, len);
return true;
}
#endif
return false;
}
void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
{
if (static_cpu_has(X86_FEATURE_XMM4_1))
static_branch_enable(&has_movntdqa);
}

View file

@ -0,0 +1,84 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include <linux/mm.h>
#include <linux/io-mapping.h>
#include <asm/pgtable.h>
#include "i915_drv.h"
struct remap_pfn {
struct mm_struct *mm;
unsigned long pfn;
pgprot_t prot;
};
static int remap_pfn(pte_t *pte, pgtable_t token,
unsigned long addr, void *data)
{
struct remap_pfn *r = data;
/* Special PTE are not associated with any struct page */
set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot)));
r->pfn++;
return 0;
}
/**
* remap_io_mapping - remap an IO mapping to userspace
* @vma: user vma to map to
* @addr: target user address to start at
* @pfn: physical address of kernel memory
* @size: size of map area
* @iomap: the source io_mapping
*
* Note: this is only safe if the mm semaphore is held when called.
*/
int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap)
{
struct remap_pfn r;
int err;
GEM_BUG_ON((vma->vm_flags &
(VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) !=
(VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP));
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
r.mm = vma->vm_mm;
r.pfn = pfn;
r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
(pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
if (unlikely(err)) {
zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
return err;
}
return 0;
}

View file

@ -45,6 +45,7 @@ struct i915_params i915 __read_mostly = {
.fastboot = 0,
.prefault_disable = 0,
.load_detect_test = 0,
.force_reset_modeset_test = 0,
.reset = true,
.invert_brightness = 0,
.disable_display = 0,
@ -161,6 +162,11 @@ MODULE_PARM_DESC(load_detect_test,
"Force-enable the VGA load detect code for testing (default:false). "
"For developers only.");
module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600);
MODULE_PARM_DESC(force_reset_modeset_test,
"Force a modeset during gpu reset for testing (default:false). "
"For developers only.");
module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600);
MODULE_PARM_DESC(invert_brightness,
"Invert backlight brightness "

View file

@ -57,6 +57,7 @@ struct i915_params {
bool fastboot;
bool prefault_disable;
bool load_detect_test;
bool force_reset_modeset_test;
bool reset;
bool disable_display;
bool verbose_state_checks;

View file

@ -3660,8 +3660,17 @@ enum {
#define VIDEO_DIP_ENABLE_SPD_HSW (1 << 0)
/* Panel power sequencing */
#define PP_STATUS _MMIO(0x61200)
#define PP_ON (1 << 31)
#define PPS_BASE 0x61200
#define VLV_PPS_BASE (VLV_DISPLAY_BASE + PPS_BASE)
#define PCH_PPS_BASE 0xC7200
#define _MMIO_PPS(pps_idx, reg) _MMIO(dev_priv->pps_mmio_base - \
PPS_BASE + (reg) + \
(pps_idx) * 0x100)
#define _PP_STATUS 0x61200
#define PP_STATUS(pps_idx) _MMIO_PPS(pps_idx, _PP_STATUS)
#define PP_ON (1 << 31)
/*
* Indicates that all dependencies of the panel are on:
*
@ -3669,14 +3678,14 @@ enum {
* - pipe enabled
* - LVDS/DVOB/DVOC on
*/
#define PP_READY (1 << 30)
#define PP_SEQUENCE_NONE (0 << 28)
#define PP_SEQUENCE_POWER_UP (1 << 28)
#define PP_SEQUENCE_POWER_DOWN (2 << 28)
#define PP_SEQUENCE_MASK (3 << 28)
#define PP_SEQUENCE_SHIFT 28
#define PP_CYCLE_DELAY_ACTIVE (1 << 27)
#define PP_SEQUENCE_STATE_MASK 0x0000000f
#define PP_READY (1 << 30)
#define PP_SEQUENCE_NONE (0 << 28)
#define PP_SEQUENCE_POWER_UP (1 << 28)
#define PP_SEQUENCE_POWER_DOWN (2 << 28)
#define PP_SEQUENCE_MASK (3 << 28)
#define PP_SEQUENCE_SHIFT 28
#define PP_CYCLE_DELAY_ACTIVE (1 << 27)
#define PP_SEQUENCE_STATE_MASK 0x0000000f
#define PP_SEQUENCE_STATE_OFF_IDLE (0x0 << 0)
#define PP_SEQUENCE_STATE_OFF_S0_1 (0x1 << 0)
#define PP_SEQUENCE_STATE_OFF_S0_2 (0x2 << 0)
@ -3686,11 +3695,46 @@ enum {
#define PP_SEQUENCE_STATE_ON_S1_2 (0xa << 0)
#define PP_SEQUENCE_STATE_ON_S1_3 (0xb << 0)
#define PP_SEQUENCE_STATE_RESET (0xf << 0)
#define PP_CONTROL _MMIO(0x61204)
#define POWER_TARGET_ON (1 << 0)
#define PP_ON_DELAYS _MMIO(0x61208)
#define PP_OFF_DELAYS _MMIO(0x6120c)
#define PP_DIVISOR _MMIO(0x61210)
#define _PP_CONTROL 0x61204
#define PP_CONTROL(pps_idx) _MMIO_PPS(pps_idx, _PP_CONTROL)
#define PANEL_UNLOCK_REGS (0xabcd << 16)
#define PANEL_UNLOCK_MASK (0xffff << 16)
#define BXT_POWER_CYCLE_DELAY_MASK 0x1f0
#define BXT_POWER_CYCLE_DELAY_SHIFT 4
#define EDP_FORCE_VDD (1 << 3)
#define EDP_BLC_ENABLE (1 << 2)
#define PANEL_POWER_RESET (1 << 1)
#define PANEL_POWER_OFF (0 << 0)
#define PANEL_POWER_ON (1 << 0)
#define _PP_ON_DELAYS 0x61208
#define PP_ON_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_ON_DELAYS)
#define PANEL_PORT_SELECT_SHIFT 30
#define PANEL_PORT_SELECT_MASK (3 << 30)
#define PANEL_PORT_SELECT_LVDS (0 << 30)
#define PANEL_PORT_SELECT_DPA (1 << 30)
#define PANEL_PORT_SELECT_DPC (2 << 30)
#define PANEL_PORT_SELECT_DPD (3 << 30)
#define PANEL_PORT_SELECT_VLV(port) ((port) << 30)
#define PANEL_POWER_UP_DELAY_MASK 0x1fff0000
#define PANEL_POWER_UP_DELAY_SHIFT 16
#define PANEL_LIGHT_ON_DELAY_MASK 0x1fff
#define PANEL_LIGHT_ON_DELAY_SHIFT 0
#define _PP_OFF_DELAYS 0x6120C
#define PP_OFF_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_OFF_DELAYS)
#define PANEL_POWER_DOWN_DELAY_MASK 0x1fff0000
#define PANEL_POWER_DOWN_DELAY_SHIFT 16
#define PANEL_LIGHT_OFF_DELAY_MASK 0x1fff
#define PANEL_LIGHT_OFF_DELAY_SHIFT 0
#define _PP_DIVISOR 0x61210
#define PP_DIVISOR(pps_idx) _MMIO_PPS(pps_idx, _PP_DIVISOR)
#define PP_REFERENCE_DIVIDER_MASK 0xffffff00
#define PP_REFERENCE_DIVIDER_SHIFT 8
#define PANEL_POWER_CYCLE_DELAY_MASK 0x1f
#define PANEL_POWER_CYCLE_DELAY_SHIFT 0
/* Panel fitting */
#define PFIT_CONTROL _MMIO(dev_priv->info.display_mmio_offset + 0x61230)
@ -6750,77 +6794,6 @@ enum {
#define PCH_LVDS _MMIO(0xe1180)
#define LVDS_DETECTED (1 << 1)
/* vlv has 2 sets of panel control regs. */
#define _PIPEA_PP_STATUS (VLV_DISPLAY_BASE + 0x61200)
#define _PIPEA_PP_CONTROL (VLV_DISPLAY_BASE + 0x61204)
#define _PIPEA_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61208)
#define PANEL_PORT_SELECT_VLV(port) ((port) << 30)
#define _PIPEA_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6120c)
#define _PIPEA_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61210)
#define _PIPEB_PP_STATUS (VLV_DISPLAY_BASE + 0x61300)
#define _PIPEB_PP_CONTROL (VLV_DISPLAY_BASE + 0x61304)
#define _PIPEB_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61308)
#define _PIPEB_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6130c)
#define _PIPEB_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61310)
#define VLV_PIPE_PP_STATUS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS)
#define VLV_PIPE_PP_CONTROL(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL)
#define VLV_PIPE_PP_ON_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS)
#define VLV_PIPE_PP_OFF_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS)
#define VLV_PIPE_PP_DIVISOR(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR)
#define _PCH_PP_STATUS 0xc7200
#define _PCH_PP_CONTROL 0xc7204
#define PANEL_UNLOCK_REGS (0xabcd << 16)
#define PANEL_UNLOCK_MASK (0xffff << 16)
#define BXT_POWER_CYCLE_DELAY_MASK (0x1f0)
#define BXT_POWER_CYCLE_DELAY_SHIFT 4
#define EDP_FORCE_VDD (1 << 3)
#define EDP_BLC_ENABLE (1 << 2)
#define PANEL_POWER_RESET (1 << 1)
#define PANEL_POWER_OFF (0 << 0)
#define PANEL_POWER_ON (1 << 0)
#define _PCH_PP_ON_DELAYS 0xc7208
#define PANEL_PORT_SELECT_MASK (3 << 30)
#define PANEL_PORT_SELECT_LVDS (0 << 30)
#define PANEL_PORT_SELECT_DPA (1 << 30)
#define PANEL_PORT_SELECT_DPC (2 << 30)
#define PANEL_PORT_SELECT_DPD (3 << 30)
#define PANEL_POWER_UP_DELAY_MASK (0x1fff0000)
#define PANEL_POWER_UP_DELAY_SHIFT 16
#define PANEL_LIGHT_ON_DELAY_MASK (0x1fff)
#define PANEL_LIGHT_ON_DELAY_SHIFT 0
#define _PCH_PP_OFF_DELAYS 0xc720c
#define PANEL_POWER_DOWN_DELAY_MASK (0x1fff0000)
#define PANEL_POWER_DOWN_DELAY_SHIFT 16
#define PANEL_LIGHT_OFF_DELAY_MASK (0x1fff)
#define PANEL_LIGHT_OFF_DELAY_SHIFT 0
#define _PCH_PP_DIVISOR 0xc7210
#define PP_REFERENCE_DIVIDER_MASK (0xffffff00)
#define PP_REFERENCE_DIVIDER_SHIFT 8
#define PANEL_POWER_CYCLE_DELAY_MASK (0x1f)
#define PANEL_POWER_CYCLE_DELAY_SHIFT 0
#define PCH_PP_STATUS _MMIO(_PCH_PP_STATUS)
#define PCH_PP_CONTROL _MMIO(_PCH_PP_CONTROL)
#define PCH_PP_ON_DELAYS _MMIO(_PCH_PP_ON_DELAYS)
#define PCH_PP_OFF_DELAYS _MMIO(_PCH_PP_OFF_DELAYS)
#define PCH_PP_DIVISOR _MMIO(_PCH_PP_DIVISOR)
/* BXT PPS changes - 2nd set of PPS registers */
#define _BXT_PP_STATUS2 0xc7300
#define _BXT_PP_CONTROL2 0xc7304
#define _BXT_PP_ON_DELAYS2 0xc7308
#define _BXT_PP_OFF_DELAYS2 0xc730c
#define BXT_PP_STATUS(n) _MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2)
#define BXT_PP_CONTROL(n) _MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2)
#define BXT_PP_ON_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
#define BXT_PP_OFF_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
#define _PCH_DP_B 0xe4100
#define PCH_DP_B _MMIO(_PCH_DP_B)
#define _PCH_DPB_AUX_CH_CTL 0xe4110
@ -7063,12 +7036,13 @@ enum {
#define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C)
#define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030)
#define GEN6_RP_CUR_UP_EI _MMIO(0xA050)
#define GEN6_CURICONT_MASK 0xffffff
#define GEN6_RP_EI_MASK 0xffffff
#define GEN6_CURICONT_MASK GEN6_RP_EI_MASK
#define GEN6_RP_CUR_UP _MMIO(0xA054)
#define GEN6_CURBSYTAVG_MASK 0xffffff
#define GEN6_CURBSYTAVG_MASK GEN6_RP_EI_MASK
#define GEN6_RP_PREV_UP _MMIO(0xA058)
#define GEN6_RP_CUR_DOWN_EI _MMIO(0xA05C)
#define GEN6_CURIAVG_MASK 0xffffff
#define GEN6_CURIAVG_MASK GEN6_RP_EI_MASK
#define GEN6_RP_CUR_DOWN _MMIO(0xA060)
#define GEN6_RP_PREV_DOWN _MMIO(0xA064)
#define GEN6_RP_UP_EI _MMIO(0xA068)

View file

@ -37,25 +37,6 @@ static void i915_save_display(struct drm_device *dev)
if (INTEL_INFO(dev)->gen <= 4)
dev_priv->regfile.saveDSPARB = I915_READ(DSPARB);
/* LVDS state */
if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
dev_priv->regfile.saveLVDS = I915_READ(PCH_LVDS);
else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev))
dev_priv->regfile.saveLVDS = I915_READ(LVDS);
/* Panel power sequencer */
if (HAS_PCH_SPLIT(dev)) {
dev_priv->regfile.savePP_CONTROL = I915_READ(PCH_PP_CONTROL);
dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
} else if (INTEL_INFO(dev)->gen <= 4) {
dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL);
dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
dev_priv->regfile.savePP_DIVISOR = I915_READ(PP_DIVISOR);
}
/* save FBC interval */
if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev))
dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL);
@ -64,33 +45,11 @@ static void i915_save_display(struct drm_device *dev)
static void i915_restore_display(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
u32 mask = 0xffffffff;
/* Display arbitration */
if (INTEL_INFO(dev)->gen <= 4)
I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB);
mask = ~LVDS_PORT_EN;
/* LVDS state */
if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
I915_WRITE(PCH_LVDS, dev_priv->regfile.saveLVDS & mask);
else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev))
I915_WRITE(LVDS, dev_priv->regfile.saveLVDS & mask);
/* Panel power sequencer */
if (HAS_PCH_SPLIT(dev)) {
I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
} else if (INTEL_INFO(dev)->gen <= 4) {
I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
I915_WRITE(PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
}
/* only restore FBC info on the platform that supports FBC*/
intel_fbc_global_disable(dev_priv);

View file

@ -26,6 +26,40 @@
#include "i915_drv.h"
static void intel_breadcrumbs_hangcheck(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
if (!b->irq_enabled)
return;
if (time_before(jiffies, b->timeout)) {
mod_timer(&b->hangcheck, b->timeout);
return;
}
DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
/* Ensure that even if the GPU hangs, we get woken up.
*
* However, note that if no one is waiting, we never notice
* a gpu hang. Eventually, we will have to wait for a resource
* held by the GPU and so trigger a hangcheck. In the most
* pathological case, this will be upon memory starvation! To
* prevent this, we also queue the hangcheck from the retire
* worker.
*/
i915_queue_hangcheck(engine->i915);
}
static unsigned long wait_timeout(void)
{
return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
}
static void intel_breadcrumbs_fake_irq(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@ -37,10 +71,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
* every jiffie in order to kick the oldest waiter to do the
* coherent seqno check.
*/
rcu_read_lock();
if (intel_engine_wakeup(engine))
mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
rcu_read_unlock();
}
static void irq_enable(struct intel_engine_cs *engine)
@ -51,13 +83,6 @@ static void irq_enable(struct intel_engine_cs *engine)
*/
engine->breadcrumbs.irq_posted = true;
/* Make sure the current hangcheck doesn't falsely accuse a just
* started irq handler from missing an interrupt (because the
* interrupt count still matches the stale value from when
* the irq handler was disabled, many hangchecks ago).
*/
engine->breadcrumbs.irq_wakeups++;
spin_lock_irq(&engine->i915->irq_lock);
engine->irq_enable(engine);
spin_unlock_irq(&engine->i915->irq_lock);
@ -98,17 +123,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
}
if (!b->irq_enabled ||
test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
mod_timer(&b->fake_irq, jiffies + 1);
/* Ensure that even if the GPU hangs, we get woken up.
*
* However, note that if no one is waiting, we never notice
* a gpu hang. Eventually, we will have to wait for a resource
* held by the GPU and so trigger a hangcheck. In the most
* pathological case, this will be upon memory starvation!
*/
i915_queue_hangcheck(i915);
} else {
/* Ensure we never sleep indefinitely */
GEM_BUG_ON(!time_after(b->timeout, jiffies));
mod_timer(&b->hangcheck, b->timeout);
}
}
static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
@ -211,7 +232,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
}
rb_link_node(&wait->node, parent, p);
rb_insert_color(&wait->node, &b->waiters);
GEM_BUG_ON(!first && !b->irq_seqno_bh);
GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
if (completed) {
struct rb_node *next = rb_next(completed);
@ -219,8 +240,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
GEM_BUG_ON(!next && !first);
if (next && next != &wait->node) {
GEM_BUG_ON(first);
b->timeout = wait_timeout();
b->first_wait = to_wait(next);
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
* seqno, processing the completed tasks and selecting
* the next waiter, we may have missed the interrupt
@ -245,8 +267,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
b->timeout = wait_timeout();
b->first_wait = wait;
smp_store_mb(b->irq_seqno_bh, wait->tsk);
rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
* perform a cursory check to prevent a missed interrupt.
* Either we miss the interrupt whilst programming the hardware,
@ -257,7 +280,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
*/
__intel_breadcrumbs_enable_irq(b);
}
GEM_BUG_ON(!b->irq_seqno_bh);
GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
GEM_BUG_ON(!b->first_wait);
GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
@ -277,11 +300,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
return first;
}
void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
{
mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
}
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
return rb && to_wait(rb)->tsk->prio <= priority;
@ -317,7 +335,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
const int priority = wakeup_priority(b, wait->tsk);
struct rb_node *next;
GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
/* We are the current bottom-half. Find the next candidate,
* the first waiter in the queue on the remaining oldest
@ -359,14 +377,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
* the interrupt, or if we have to handle an
* exception rather than a seqno completion.
*/
b->timeout = wait_timeout();
b->first_wait = to_wait(next);
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
__intel_breadcrumbs_enable_irq(b);
wake_up_process(b->irq_seqno_bh);
wake_up_process(b->first_wait->tsk);
} else {
b->first_wait = NULL;
WRITE_ONCE(b->irq_seqno_bh, NULL);
rcu_assign_pointer(b->irq_seqno_bh, NULL);
__intel_breadcrumbs_disable_irq(b);
}
} else {
@ -380,7 +399,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
GEM_BUG_ON(b->first_wait == wait);
GEM_BUG_ON(rb_first(&b->waiters) !=
(b->first_wait ? &b->first_wait->node : NULL));
GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
spin_unlock(&b->lock);
}
@ -536,6 +555,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
setup_timer(&b->fake_irq,
intel_breadcrumbs_fake_irq,
(unsigned long)engine);
setup_timer(&b->hangcheck,
intel_breadcrumbs_hangcheck,
(unsigned long)engine);
/* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current
@ -560,6 +582,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler);
del_timer_sync(&b->hangcheck);
del_timer_sync(&b->fake_irq);
}
@ -573,11 +596,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915)
* RCU lock, i.e. as we call wake_up_process() we must be holding the
* rcu_read_lock().
*/
rcu_read_lock();
for_each_engine(engine, i915)
if (unlikely(intel_engine_wakeup(engine)))
mask |= intel_engine_flag(engine);
rcu_read_unlock();
return mask;
}

File diff suppressed because it is too large Load diff

View file

@ -256,6 +256,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev,
static void
intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev,
struct intel_dp *intel_dp);
static void
intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp);
static void pps_lock(struct intel_dp *intel_dp)
{
@ -463,13 +465,13 @@ typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv,
static bool vlv_pipe_has_pp_on(struct drm_i915_private *dev_priv,
enum pipe pipe)
{
return I915_READ(VLV_PIPE_PP_STATUS(pipe)) & PP_ON;
return I915_READ(PP_STATUS(pipe)) & PP_ON;
}
static bool vlv_pipe_has_vdd_on(struct drm_i915_private *dev_priv,
enum pipe pipe)
{
return I915_READ(VLV_PIPE_PP_CONTROL(pipe)) & EDP_FORCE_VDD;
return I915_READ(PP_CONTROL(pipe)) & EDP_FORCE_VDD;
}
static bool vlv_pipe_any(struct drm_i915_private *dev_priv,
@ -486,7 +488,7 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv,
enum pipe pipe;
for (pipe = PIPE_A; pipe <= PIPE_B; pipe++) {
u32 port_sel = I915_READ(VLV_PIPE_PP_ON_DELAYS(pipe)) &
u32 port_sel = I915_READ(PP_ON_DELAYS(pipe)) &
PANEL_PORT_SELECT_MASK;
if (port_sel != PANEL_PORT_SELECT_VLV(port))
@ -583,30 +585,21 @@ static void intel_pps_get_registers(struct drm_i915_private *dev_priv,
struct intel_dp *intel_dp,
struct pps_registers *regs)
{
int pps_idx = 0;
memset(regs, 0, sizeof(*regs));
if (IS_BROXTON(dev_priv)) {
int idx = bxt_power_sequencer_idx(intel_dp);
if (IS_BROXTON(dev_priv))
pps_idx = bxt_power_sequencer_idx(intel_dp);
else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
pps_idx = vlv_power_sequencer_pipe(intel_dp);
regs->pp_ctrl = BXT_PP_CONTROL(idx);
regs->pp_stat = BXT_PP_STATUS(idx);
regs->pp_on = BXT_PP_ON_DELAYS(idx);
regs->pp_off = BXT_PP_OFF_DELAYS(idx);
} else if (HAS_PCH_SPLIT(dev_priv)) {
regs->pp_ctrl = PCH_PP_CONTROL;
regs->pp_stat = PCH_PP_STATUS;
regs->pp_on = PCH_PP_ON_DELAYS;
regs->pp_off = PCH_PP_OFF_DELAYS;
regs->pp_div = PCH_PP_DIVISOR;
} else {
enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
regs->pp_ctrl = VLV_PIPE_PP_CONTROL(pipe);
regs->pp_stat = VLV_PIPE_PP_STATUS(pipe);
regs->pp_on = VLV_PIPE_PP_ON_DELAYS(pipe);
regs->pp_off = VLV_PIPE_PP_OFF_DELAYS(pipe);
regs->pp_div = VLV_PIPE_PP_DIVISOR(pipe);
}
regs->pp_ctrl = PP_CONTROL(pps_idx);
regs->pp_stat = PP_STATUS(pps_idx);
regs->pp_on = PP_ON_DELAYS(pps_idx);
regs->pp_off = PP_OFF_DELAYS(pps_idx);
if (!IS_BROXTON(dev_priv))
regs->pp_div = PP_DIVISOR(pps_idx);
}
static i915_reg_t
@ -651,8 +644,8 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code,
i915_reg_t pp_ctrl_reg, pp_div_reg;
u32 pp_div;
pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe);
pp_div_reg = VLV_PIPE_PP_DIVISOR(pipe);
pp_ctrl_reg = PP_CONTROL(pipe);
pp_div_reg = PP_DIVISOR(pipe);
pp_div = I915_READ(pp_div_reg);
pp_div &= PP_REFERENCE_DIVIDER_MASK;
@ -1836,7 +1829,8 @@ static u32 ironlake_get_pp_control(struct intel_dp *intel_dp)
lockdep_assert_held(&dev_priv->pps_mutex);
control = I915_READ(_pp_ctrl_reg(intel_dp));
if (!IS_BROXTON(dev)) {
if (WARN_ON(!HAS_DDI(dev_priv) &&
(control & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS)) {
control &= ~PANEL_UNLOCK_MASK;
control |= PANEL_UNLOCK_REGS;
}
@ -1957,7 +1951,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
DRM_DEBUG_KMS("PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n",
I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg));
if ((pp & POWER_TARGET_ON) == 0)
if ((pp & PANEL_POWER_ON) == 0)
intel_dp->panel_power_off_time = ktime_get_boottime();
power_domain = intel_display_port_aux_power_domain(intel_encoder);
@ -2044,7 +2038,7 @@ static void edp_panel_on(struct intel_dp *intel_dp)
POSTING_READ(pp_ctrl_reg);
}
pp |= POWER_TARGET_ON;
pp |= PANEL_POWER_ON;
if (!IS_GEN5(dev))
pp |= PANEL_POWER_RESET;
@ -2096,7 +2090,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
pp = ironlake_get_pp_control(intel_dp);
/* We need to switch off panel power _and_ force vdd, for otherwise some
* panels get very unhappy and cease to work. */
pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
EDP_BLC_ENABLE);
pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
@ -2729,7 +2723,7 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
enum pipe pipe = intel_dp->pps_pipe;
i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe);
i915_reg_t pp_on_reg = PP_ON_DELAYS(pipe);
edp_panel_vdd_off_sync(intel_dp);
@ -4666,13 +4660,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
pps_lock(intel_dp);
/*
* Read out the current power sequencer assignment,
* in case the BIOS did something with it.
*/
if (IS_VALLEYVIEW(encoder->dev) || IS_CHERRYVIEW(encoder->dev))
vlv_initial_power_sequencer_setup(intel_dp);
/* Reinit the power sequencer, in case BIOS did something with it. */
intel_dp_pps_init(encoder->dev, intel_dp);
intel_edp_panel_vdd_sanitize(intel_dp);
pps_unlock(intel_dp);
@ -5020,6 +5009,17 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev,
I915_READ(regs.pp_div));
}
static void intel_dp_pps_init(struct drm_device *dev,
struct intel_dp *intel_dp)
{
if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
vlv_initial_power_sequencer_setup(intel_dp);
} else {
intel_dp_init_panel_power_sequencer(dev, intel_dp);
intel_dp_init_panel_power_sequencer_registers(dev, intel_dp);
}
}
/**
* intel_dp_set_drrs_state - program registers for RR switch to take effect
* @dev: DRM device
@ -5434,14 +5434,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
pps_lock(intel_dp);
intel_dp_init_panel_power_timestamps(intel_dp);
if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
vlv_initial_power_sequencer_setup(intel_dp);
} else {
intel_dp_init_panel_power_sequencer(dev, intel_dp);
intel_dp_init_panel_power_sequencer_registers(dev, intel_dp);
}
intel_dp_pps_init(dev, intel_dp);
intel_edp_panel_vdd_sanitize(intel_dp);
pps_unlock(intel_dp);

View file

@ -178,11 +178,22 @@ struct intel_framebuffer {
struct drm_framebuffer base;
struct drm_i915_gem_object *obj;
struct intel_rotation_info rot_info;
/* for each plane in the normal GTT view */
struct {
unsigned int x, y;
} normal[2];
/* for each plane in the rotated GTT view */
struct {
unsigned int x, y;
unsigned int pitch; /* pixels */
} rotated[2];
};
struct intel_fbdev {
struct drm_fb_helper helper;
struct intel_framebuffer *fb;
struct i915_vma *vma;
async_cookie_t cookie;
int preferred_bpp;
};
@ -340,6 +351,15 @@ struct intel_plane_state {
struct drm_plane_state base;
struct drm_rect clip;
struct {
u32 offset;
int x, y;
} main;
struct {
u32 offset;
int x, y;
} aux;
/*
* scaler_id
* = -1 : not using a scaler
@ -1153,12 +1173,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
const char *name, u32 reg, int ref_freq);
extern const struct drm_plane_funcs intel_plane_funcs;
void intel_init_display_hooks(struct drm_i915_private *dev_priv);
unsigned int intel_fb_xy_to_linear(int x, int y,
const struct intel_plane_state *state,
int plane);
void intel_add_fb_offsets(int *x, int *y,
const struct intel_plane_state *state, int plane);
unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info);
bool intel_has_pending_fb_unpin(struct drm_device *dev);
void intel_mark_busy(struct drm_i915_private *dev_priv);
void intel_mark_idle(struct drm_i915_private *dev_priv);
void intel_crtc_restore_mode(struct drm_crtc *crtc);
int intel_display_suspend(struct drm_device *dev);
void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv);
void intel_encoder_destroy(struct drm_encoder *encoder);
int intel_connector_init(struct intel_connector *);
struct intel_connector *intel_connector_alloc(void);
@ -1214,8 +1240,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
void intel_release_load_detect_pipe(struct drm_connector *connector,
struct intel_load_detect_pipe *old,
struct drm_modeset_acquire_ctx *ctx);
int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
unsigned int rotation);
struct i915_vma *
intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
struct drm_framebuffer *
__intel_framebuffer_create(struct drm_device *dev,
@ -1277,9 +1303,7 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state);
#define assert_pipe_enabled(d, p) assert_pipe(d, p, true)
#define assert_pipe_disabled(d, p) assert_pipe(d, p, false)
u32 intel_compute_tile_offset(int *x, int *y,
const struct drm_framebuffer *fb, int plane,
unsigned int pitch,
unsigned int rotation);
const struct intel_plane_state *state, int plane);
void intel_prepare_reset(struct drm_i915_private *dev_priv);
void intel_finish_reset(struct drm_i915_private *dev_priv);
void hsw_enable_pc8(struct drm_i915_private *dev_priv);
@ -1322,13 +1346,14 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
struct drm_i915_gem_object *obj,
unsigned int plane);
u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation);
u32 skl_plane_ctl_format(uint32_t pixel_format);
u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
u32 skl_plane_ctl_rotation(unsigned int rotation);
u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
unsigned int rotation);
int skl_check_plane_surface(struct intel_plane_state *plane_state);
/* intel_csr.c */
void intel_csr_ucode_init(struct drm_i915_private *);

View file

@ -29,7 +29,7 @@
static const struct engine_info {
const char *name;
unsigned exec_id;
unsigned guc_id;
enum intel_engine_hw_id hw_id;
u32 mmio_base;
unsigned irq_shift;
int (*init_legacy)(struct intel_engine_cs *engine);
@ -38,7 +38,7 @@ static const struct engine_info {
[RCS] = {
.name = "render ring",
.exec_id = I915_EXEC_RENDER,
.guc_id = GUC_RENDER_ENGINE,
.hw_id = RCS_HW,
.mmio_base = RENDER_RING_BASE,
.irq_shift = GEN8_RCS_IRQ_SHIFT,
.init_execlists = logical_render_ring_init,
@ -47,7 +47,7 @@ static const struct engine_info {
[BCS] = {
.name = "blitter ring",
.exec_id = I915_EXEC_BLT,
.guc_id = GUC_BLITTER_ENGINE,
.hw_id = BCS_HW,
.mmio_base = BLT_RING_BASE,
.irq_shift = GEN8_BCS_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
@ -56,7 +56,7 @@ static const struct engine_info {
[VCS] = {
.name = "bsd ring",
.exec_id = I915_EXEC_BSD,
.guc_id = GUC_VIDEO_ENGINE,
.hw_id = VCS_HW,
.mmio_base = GEN6_BSD_RING_BASE,
.irq_shift = GEN8_VCS1_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
@ -65,7 +65,7 @@ static const struct engine_info {
[VCS2] = {
.name = "bsd2 ring",
.exec_id = I915_EXEC_BSD,
.guc_id = GUC_VIDEO_ENGINE2,
.hw_id = VCS2_HW,
.mmio_base = GEN8_BSD2_RING_BASE,
.irq_shift = GEN8_VCS2_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
@ -74,7 +74,7 @@ static const struct engine_info {
[VECS] = {
.name = "video enhancement ring",
.exec_id = I915_EXEC_VEBOX,
.guc_id = GUC_VIDEOENHANCE_ENGINE,
.hw_id = VECS_HW,
.mmio_base = VEBOX_RING_BASE,
.irq_shift = GEN8_VECS_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
@ -93,7 +93,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
engine->i915 = dev_priv;
engine->name = info->name;
engine->exec_id = info->exec_id;
engine->hw_id = engine->guc_id = info->guc_id;
engine->hw_id = engine->guc_id = info->hw_id;
engine->mmio_base = info->mmio_base;
engine->irq_shift = info->irq_shift;
@ -109,6 +109,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
int intel_engines_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
unsigned int mask = 0;
int (*init)(struct intel_engine_cs *engine);
unsigned int i;
@ -142,11 +143,10 @@ int intel_engines_init(struct drm_device *dev)
* are added to the driver by a warning and disabling the forgotten
* engines.
*/
if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) {
struct intel_device_info *info =
(struct intel_device_info *)&dev_priv->info;
info->ring_mask = mask;
}
if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask))
device_info->ring_mask = mask;
device_info->num_rings = hweight32(mask);
return 0;
@ -161,9 +161,56 @@ int intel_engines_init(struct drm_device *dev)
return ret;
}
void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
{
struct drm_i915_private *dev_priv = engine->i915;
/* Our semaphore implementation is strictly monotonic (i.e. we proceed
* so long as the semaphore value in the register/page is greater
* than the sync value), so whenever we reset the seqno,
* so long as we reset the tracking semaphore value to 0, it will
* always be before the next request's seqno. If we don't reset
* the semaphore value, then when the seqno moves backwards all
* future waits will complete instantly (causing rendering corruption).
*/
if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
if (HAS_VEBOX(dev_priv))
I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
}
if (dev_priv->semaphore) {
struct page *page = i915_vma_first_page(dev_priv->semaphore);
void *semaphores;
/* Semaphores are in noncoherent memory, flush to be safe */
semaphores = kmap(page);
memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
I915_NUM_ENGINES * gen8_semaphore_seqno_size);
kunmap(page);
}
memset(engine->semaphore.sync_seqno, 0,
sizeof(engine->semaphore.sync_seqno));
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
engine->last_submitted_seqno = seqno;
engine->hangcheck.seqno = seqno;
/* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno.
*/
intel_engine_wakeup(engine);
}
void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
{
memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
static void intel_engine_init_requests(struct intel_engine_cs *engine)
@ -192,6 +239,49 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_requests(engine);
intel_engine_init_hangcheck(engine);
i915_gem_batch_pool_init(engine, &engine->batch_pool);
intel_engine_init_cmd_parser(engine);
}
int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int ret;
WARN_ON(engine->scratch);
obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
if (!obj)
obj = i915_gem_object_create(&engine->i915->drm, size);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate scratch page\n");
return PTR_ERR(obj);
}
vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unref;
}
ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
if (ret)
goto err_unref;
engine->scratch = vma;
DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
engine->name, i915_ggtt_offset(vma));
return 0;
err_unref:
i915_gem_object_put(obj);
return ret;
}
static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
{
i915_vma_unpin_and_release(&engine->scratch);
}
/**
@ -213,7 +303,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret)
return ret;
return intel_engine_init_cmd_parser(engine);
return 0;
}
/**
@ -225,7 +315,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
intel_engine_cleanup_cmd_parser(engine);
intel_engine_cleanup_scratch(engine);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
i915_gem_batch_pool_fini(&engine->batch_pool);
}

View file

@ -190,9 +190,13 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
dpfc_ctl |= DPFC_CTL_LIMIT_2X;
else
dpfc_ctl |= DPFC_CTL_LIMIT_1X;
dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg;
I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg;
I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
} else {
I915_WRITE(DPFC_FENCE_YOFF, 0);
}
/* enable it... */
I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
@ -244,21 +248,29 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
dpfc_ctl |= DPFC_CTL_LIMIT_1X;
break;
}
dpfc_ctl |= DPFC_CTL_FENCE_EN;
if (IS_GEN5(dev_priv))
dpfc_ctl |= params->fb.fence_reg;
if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
dpfc_ctl |= DPFC_CTL_FENCE_EN;
if (IS_GEN5(dev_priv))
dpfc_ctl |= params->fb.fence_reg;
if (IS_GEN6(dev_priv)) {
I915_WRITE(SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
I915_WRITE(DPFC_CPU_FENCE_OFFSET,
params->crtc.fence_y_offset);
}
} else {
if (IS_GEN6(dev_priv)) {
I915_WRITE(SNB_DPFC_CTL_SA, 0);
I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0);
}
}
I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID);
/* enable it... */
I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
if (IS_GEN6(dev_priv)) {
I915_WRITE(SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
}
intel_fbc_recompress(dev_priv);
}
@ -305,7 +317,15 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
break;
}
dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
I915_WRITE(SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
} else {
I915_WRITE(SNB_DPFC_CTL_SA,0);
I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0);
}
if (dev_priv->fbc.false_color)
dpfc_ctl |= FBC_CTL_FALSE_COLOR;
@ -324,10 +344,6 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
I915_WRITE(SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
intel_fbc_recompress(dev_priv);
}
@ -709,6 +725,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
return effective_w <= max_w && effective_h <= max_h;
}
/* XXX replace me when we have VMA tracking for intel_plane_state */
static int get_fence_id(struct drm_framebuffer *fb)
{
struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
}
static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
struct intel_crtc_state *crtc_state,
struct intel_plane_state *plane_state)
@ -737,10 +761,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
/* FIXME: We lack the proper locking here, so only run this on the
* platforms that need. */
if (IS_GEN(dev_priv, 5, 6))
cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj);
cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
cache->fb.pixel_format = fb->pixel_format;
cache->fb.stride = fb->pitches[0];
cache->fb.fence_reg = obj->fence_reg;
cache->fb.fence_reg = get_fence_id(fb);
cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
}
@ -768,11 +792,17 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
/* The use of a CPU fence is mandatory in order to detect writes
* by the CPU to the scanout and trigger updates to the FBC.
*
* Note that is possible for a tiled surface to be unmappable (and
* so have no fence associated with it) due to aperture constaints
* at the time of pinning.
*/
if (cache->fb.tiling_mode != I915_TILING_X ||
cache->fb.fence_reg == I915_FENCE_REG_NONE) {
fbc->no_fbc_reason = "framebuffer not tiled or fenced";
return false;
if (INTEL_GEN(dev_priv) < 5) {
fbc->no_fbc_reason = "framebuffer not tiled or fenced";
return false;
}
}
if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) &&
cache->plane.rotation != DRM_ROTATE_0) {

View file

@ -187,7 +187,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
struct fb_info *info;
struct drm_framebuffer *fb;
struct i915_vma *vma;
struct drm_i915_gem_object *obj;
bool prealloc = false;
void __iomem *vaddr;
int ret;
@ -215,17 +214,17 @@ static int intelfb_create(struct drm_fb_helper *helper,
sizes->fb_height = intel_fb->base.height;
}
obj = intel_fb->obj;
mutex_lock(&dev->struct_mutex);
/* Pin the GGTT vma for our access via info->screen_base.
* This also validates that any existing fb inherited from the
* BIOS is suitable for own access.
*/
ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
if (ret)
vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out_unlock;
}
info = drm_fb_helper_alloc_fbi(helper);
if (IS_ERR(info)) {
@ -245,13 +244,11 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
info->fbops = &intelfb_ops;
vma = i915_gem_obj_to_ggtt(obj);
/* setup aperture base/size for vesafb takeover */
info->apertures->ranges[0].base = dev->mode_config.fb_base;
info->apertures->ranges[0].size = ggtt->mappable_end;
info->fix.smem_start = dev->mode_config.fb_base + vma->node.start;
info->fix.smem_start = dev->mode_config.fb_base + i915_ggtt_offset(vma);
info->fix.smem_len = vma->node.size;
vaddr = i915_vma_pin_iomap(vma);
@ -273,14 +270,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
* If the object is stolen however, it will be full of whatever
* garbage was left in there.
*/
if (ifbdev->fb->obj->stolen && !prealloc)
if (intel_fb->obj->stolen && !prealloc)
memset_io(info->screen_base, 0, info->screen_size);
/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n",
fb->width, fb->height,
i915_gem_obj_ggtt_offset(obj), obj);
DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
fb->width, fb->height, i915_ggtt_offset(vma));
ifbdev->vma = vma;
mutex_unlock(&dev->struct_mutex);
vga_switcheroo_client_fb_set(dev->pdev, info);

View file

@ -63,26 +63,25 @@ struct drm_i915_gem_request;
* retcode: errno from last guc_submit()
*/
struct i915_guc_client {
struct drm_i915_gem_object *client_obj;
struct i915_vma *vma;
void *client_base; /* first page (only) of above */
struct i915_gem_context *owner;
struct intel_guc *guc;
uint32_t engines; /* bitmap of (host) engine ids */
uint32_t priority;
uint32_t ctx_index;
uint32_t proc_desc_offset;
uint32_t doorbell_offset;
uint32_t cookie;
uint16_t doorbell_id;
uint16_t padding; /* Maintain alignment */
uint16_t padding[3]; /* Maintain alignment */
uint32_t wq_offset;
uint32_t wq_size;
uint32_t wq_tail;
uint32_t unused; /* Was 'wq_head' */
uint32_t no_wq_space;
uint32_t q_fail; /* No longer used */
uint32_t b_fail;
int retcode;
@ -125,11 +124,10 @@ struct intel_guc_fw {
struct intel_guc {
struct intel_guc_fw guc_fw;
uint32_t log_flags;
struct drm_i915_gem_object *log_obj;
struct i915_vma *log_vma;
struct drm_i915_gem_object *ads_obj;
struct drm_i915_gem_object *ctx_pool_obj;
struct i915_vma *ads_vma;
struct i915_vma *ctx_pool_vma;
struct ida ctx_ids;
struct i915_guc_client *execbuf_client;

View file

@ -59,13 +59,25 @@
*
*/
#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin"
#define SKL_FW_MAJOR 6
#define SKL_FW_MINOR 1
#define BXT_FW_MAJOR 8
#define BXT_FW_MINOR 7
#define KBL_FW_MAJOR 9
#define KBL_FW_MINOR 14
#define GUC_FW_PATH(platform, major, minor) \
"i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR)
MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin"
#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR)
MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
#define I915_KBL_GUC_UCODE "i915/kbl_guc_ver9_14.bin"
#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
/* User-friendly representation of an enum */
@ -181,16 +193,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
}
if (guc->ads_obj) {
u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj)
>> PAGE_SHIFT;
if (guc->ads_vma) {
u32 ads = i915_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
}
/* If GuC submission is enabled, set up additional parameters here */
if (i915.enable_guc_submission) {
u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
u32 pgs = i915_ggtt_offset(dev_priv->guc.ctx_pool_vma);
u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
pgs >>= PAGE_SHIFT;
@ -238,12 +249,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv,
* Note that GuC needs the CSS header plus uKernel code to be copied by the
* DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
*/
static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
struct i915_vma *vma)
{
struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
unsigned long offset;
struct sg_table *sg = fw_obj->pages;
struct sg_table *sg = vma->pages;
u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
int i, ret = 0;
@ -260,7 +271,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
/* Set the source address for the new blob */
offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset;
offset = i915_ggtt_offset(vma) + guc_fw->header_offset;
I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
@ -315,6 +326,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
{
struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
struct drm_device *dev = &dev_priv->drm;
struct i915_vma *vma;
int ret;
ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false);
@ -323,10 +335,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
return ret;
}
ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
if (ret) {
DRM_DEBUG_DRIVER("pin failed %d\n", ret);
return ret;
vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
if (IS_ERR(vma)) {
DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma));
return PTR_ERR(vma);
}
/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
@ -369,7 +381,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
set_guc_init_params(dev_priv);
ret = guc_ucode_xfer_dma(dev_priv);
ret = guc_ucode_xfer_dma(dev_priv, vma);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@ -377,7 +389,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
* We keep the object pages for reuse during resume. But we can unpin it
* now that DMA has completed, so it doesn't continue to take up space.
*/
i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj);
i915_vma_unpin(vma);
return ret;
}
@ -697,16 +709,16 @@ void intel_guc_init(struct drm_device *dev)
fw_path = NULL;
} else if (IS_SKYLAKE(dev)) {
fw_path = I915_SKL_GUC_UCODE;
guc_fw->guc_fw_major_wanted = 6;
guc_fw->guc_fw_minor_wanted = 1;
guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR;
guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR;
} else if (IS_BROXTON(dev)) {
fw_path = I915_BXT_GUC_UCODE;
guc_fw->guc_fw_major_wanted = 8;
guc_fw->guc_fw_minor_wanted = 7;
guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR;
guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR;
} else if (IS_KABYLAKE(dev)) {
fw_path = I915_KBL_GUC_UCODE;
guc_fw->guc_fw_major_wanted = 9;
guc_fw->guc_fw_minor_wanted = 14;
guc_fw->guc_fw_major_wanted = KBL_FW_MAJOR;
guc_fw->guc_fw_minor_wanted = KBL_FW_MINOR;
} else {
fw_path = ""; /* unknown device */
}

View file

@ -477,7 +477,8 @@ void intel_hpd_init(struct drm_i915_private *dev_priv)
spin_unlock_irq(&dev_priv->irq_lock);
}
void i915_hpd_poll_init_work(struct work_struct *work) {
static void i915_hpd_poll_init_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, struct drm_i915_private,
hotplug.poll_init_work);

View file

@ -315,7 +315,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
desc = ctx->desc_template; /* bits 3-4 */
desc |= engine->ctx_desc_template; /* bits 0-11 */
desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE;
/* bits 12-31 */
desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
@ -763,7 +763,6 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
static int intel_lr_context_pin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = ctx->i915;
struct intel_context *ce = &ctx->engine[engine->id];
void *vaddr;
u32 *lrc_reg_state;
@ -774,16 +773,15 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
if (ce->pin_count++)
return 0;
ret = i915_gem_object_ggtt_pin(ce->state, NULL,
0, GEN8_LR_CONTEXT_ALIGN,
PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
if (ret)
goto err;
vaddr = i915_gem_object_pin_map(ce->state);
vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
goto unpin_ctx_obj;
goto unpin_vma;
}
lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
@ -792,24 +790,26 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
if (ret)
goto unpin_map;
ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state);
intel_lr_context_descriptor_update(ctx, engine);
lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start;
lrc_reg_state[CTX_RING_BUFFER_START+1] =
i915_ggtt_offset(ce->ring->vma);
ce->lrc_reg_state = lrc_reg_state;
ce->state->dirty = true;
ce->state->obj->dirty = true;
/* Invalidate GuC TLB. */
if (i915.enable_guc_submission)
if (i915.enable_guc_submission) {
struct drm_i915_private *dev_priv = ctx->i915;
I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
i915_gem_context_get(ctx);
return 0;
unpin_map:
i915_gem_object_unpin_map(ce->state);
unpin_ctx_obj:
i915_gem_object_ggtt_unpin(ce->state);
i915_gem_object_unpin_map(ce->state->obj);
unpin_vma:
__i915_vma_unpin(ce->state);
err:
ce->pin_count = 0;
return ret;
@ -828,12 +828,8 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
intel_ring_unpin(ce->ring);
i915_gem_object_unpin_map(ce->state);
i915_gem_object_ggtt_unpin(ce->state);
ce->lrc_vma = NULL;
ce->lrc_desc = 0;
ce->lrc_reg_state = NULL;
i915_gem_object_unpin_map(ce->state->obj);
i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
@ -919,7 +915,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
wa_ctx_emit(batch, index, 0);
wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
@ -937,7 +933,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256);
wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
wa_ctx_emit(batch, index, 0);
return index;
@ -998,7 +994,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
/* Actual scratch location is at 128 bytes offset */
scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@ -1077,8 +1073,8 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
/* WaClearSlmSpaceAtContextSwitch:kbl */
/* Actual scratch location is at 128 bytes offset */
if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
uint32_t scratch_addr
= engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
u32 scratch_addr =
i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@ -1170,45 +1166,44 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
{
int ret;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int err;
engine->wa_ctx.obj = i915_gem_object_create(&engine->i915->drm,
PAGE_ALIGN(size));
if (IS_ERR(engine->wa_ctx.obj)) {
DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
ret = PTR_ERR(engine->wa_ctx.obj);
engine->wa_ctx.obj = NULL;
return ret;
obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size));
if (IS_ERR(obj))
return PTR_ERR(obj);
vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
}
ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
0, PAGE_SIZE, 0);
if (ret) {
DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
ret);
i915_gem_object_put(engine->wa_ctx.obj);
return ret;
}
err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err;
engine->wa_ctx.vma = vma;
return 0;
err:
i915_gem_object_put(obj);
return err;
}
static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
{
if (engine->wa_ctx.obj) {
i915_gem_object_ggtt_unpin(engine->wa_ctx.obj);
i915_gem_object_put(engine->wa_ctx.obj);
engine->wa_ctx.obj = NULL;
}
i915_vma_unpin_and_release(&engine->wa_ctx.vma);
}
static int intel_init_workaround_bb(struct intel_engine_cs *engine)
{
int ret;
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
uint32_t *batch;
uint32_t offset;
struct page *page;
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
int ret;
WARN_ON(engine->id != RCS);
@ -1220,7 +1215,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
}
/* some WA perform writes to scratch page, ensure it is valid */
if (engine->scratch.obj == NULL) {
if (!engine->scratch) {
DRM_ERROR("scratch page not allocated for %s\n", engine->name);
return -EINVAL;
}
@ -1231,7 +1226,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0);
page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
batch = kmap_atomic(page);
offset = 0;
@ -1278,7 +1273,7 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
(u32)engine->status_page.gfx_addr);
engine->status_page.ggtt_offset);
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
}
@ -1488,7 +1483,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
{
struct intel_ring *ring = request->ring;
struct intel_engine_cs *engine = request->engine;
u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
u32 scratch_addr =
i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false;
u32 flags = 0;
int ret;
@ -1700,9 +1696,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
if (engine->status_page.obj) {
i915_gem_object_unpin_map(engine->status_page.obj);
engine->status_page.obj = NULL;
if (engine->status_page.vma) {
i915_gem_object_unpin_map(engine->status_page.vma->obj);
engine->status_page.vma = NULL;
}
intel_lr_context_unpin(dev_priv->kernel_context, engine);
@ -1747,19 +1743,19 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
}
static int
lrc_setup_hws(struct intel_engine_cs *engine,
struct drm_i915_gem_object *dctx_obj)
lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
{
const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
void *hws;
/* The HWSP is part of the default context object in LRC mode. */
engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
LRC_PPHWSP_PN * PAGE_SIZE;
hws = i915_gem_object_pin_map(dctx_obj);
hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
if (IS_ERR(hws))
return PTR_ERR(hws);
engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
engine->status_page.obj = dctx_obj;
engine->status_page.page_addr = hws + hws_offset;
engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
engine->status_page.vma = vma;
return 0;
}
@ -1849,11 +1845,10 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
else
engine->init_hw = gen8_init_render_ring;
engine->init_context = gen8_init_rcs_context;
engine->cleanup = intel_fini_pipe_control;
engine->emit_flush = gen8_emit_flush_render;
engine->emit_request = gen8_emit_request_render;
ret = intel_init_pipe_control(engine, 4096);
ret = intel_engine_create_scratch(engine, 4096);
if (ret)
return ret;
@ -1968,7 +1963,7 @@ populate_lr_context(struct i915_gem_context *ctx,
return ret;
}
vaddr = i915_gem_object_pin_map(ctx_obj);
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
@ -2025,9 +2020,9 @@ populate_lr_context(struct i915_gem_context *ctx,
RING_INDIRECT_CTX(engine->mmio_base), 0);
ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0);
if (engine->wa_ctx.obj) {
if (engine->wa_ctx.vma) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
reg_state[CTX_RCS_INDIRECT_CTX+1] =
(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
@ -2131,6 +2126,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
{
struct drm_i915_gem_object *ctx_obj;
struct intel_context *ce = &ctx->engine[engine->id];
struct i915_vma *vma;
uint32_t context_size;
struct intel_ring *ring;
int ret;
@ -2148,6 +2144,12 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
return PTR_ERR(ctx_obj);
}
vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto error_deref_obj;
}
ring = intel_engine_create_ring(engine, ctx->ring_size);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
@ -2161,7 +2163,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
}
ce->ring = ring;
ce->state = ctx_obj;
ce->state = vma;
ce->initialised = engine->init_context == NULL;
return 0;
@ -2170,8 +2172,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
intel_ring_free(ring);
error_deref_obj:
i915_gem_object_put(ctx_obj);
ce->ring = NULL;
ce->state = NULL;
return ret;
}
@ -2182,24 +2182,23 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
for_each_engine(engine, dev_priv) {
struct intel_context *ce = &ctx->engine[engine->id];
struct drm_i915_gem_object *ctx_obj = ce->state;
void *vaddr;
uint32_t *reg_state;
if (!ctx_obj)
if (!ce->state)
continue;
vaddr = i915_gem_object_pin_map(ctx_obj);
vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
if (WARN_ON(IS_ERR(vaddr)))
continue;
reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
ctx_obj->dirty = true;
reg_state[CTX_RING_HEAD+1] = 0;
reg_state[CTX_RING_TAIL+1] = 0;
i915_gem_object_unpin_map(ctx_obj);
ce->state->obj->dirty = true;
i915_gem_object_unpin_map(ce->state->obj);
ce->ring->head = 0;
ce->ring->tail = 0;

View file

@ -48,6 +48,20 @@ struct intel_lvds_connector {
struct notifier_block lid_notifier;
};
struct intel_lvds_pps {
/* 100us units */
int t1_t2;
int t3;
int t4;
int t5;
int tx;
int divider;
int port;
bool powerdown_on_reset;
};
struct intel_lvds_encoder {
struct intel_encoder base;
@ -55,6 +69,9 @@ struct intel_lvds_encoder {
i915_reg_t reg;
u32 a3_power;
struct intel_lvds_pps init_pps;
u32 init_lvds_val;
struct intel_lvds_connector *attached_connector;
};
@ -136,6 +153,83 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
}
static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
struct intel_lvds_pps *pps)
{
u32 val;
pps->powerdown_on_reset = I915_READ(PP_CONTROL(0)) & PANEL_POWER_RESET;
val = I915_READ(PP_ON_DELAYS(0));
pps->port = (val & PANEL_PORT_SELECT_MASK) >>
PANEL_PORT_SELECT_SHIFT;
pps->t1_t2 = (val & PANEL_POWER_UP_DELAY_MASK) >>
PANEL_POWER_UP_DELAY_SHIFT;
pps->t5 = (val & PANEL_LIGHT_ON_DELAY_MASK) >>
PANEL_LIGHT_ON_DELAY_SHIFT;
val = I915_READ(PP_OFF_DELAYS(0));
pps->t3 = (val & PANEL_POWER_DOWN_DELAY_MASK) >>
PANEL_POWER_DOWN_DELAY_SHIFT;
pps->tx = (val & PANEL_LIGHT_OFF_DELAY_MASK) >>
PANEL_LIGHT_OFF_DELAY_SHIFT;
val = I915_READ(PP_DIVISOR(0));
pps->divider = (val & PP_REFERENCE_DIVIDER_MASK) >>
PP_REFERENCE_DIVIDER_SHIFT;
val = (val & PANEL_POWER_CYCLE_DELAY_MASK) >>
PANEL_POWER_CYCLE_DELAY_SHIFT;
/*
* Remove the BSpec specified +1 (100ms) offset that accounts for a
* too short power-cycle delay due to the asynchronous programming of
* the register.
*/
if (val)
val--;
/* Convert from 100ms to 100us units */
pps->t4 = val * 1000;
if (INTEL_INFO(dev_priv)->gen <= 4 &&
pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
DRM_DEBUG_KMS("Panel power timings uninitialized, "
"setting defaults\n");
/* Set T2 to 40ms and T5 to 200ms in 100 usec units */
pps->t1_t2 = 40 * 10;
pps->t5 = 200 * 10;
/* Set T3 to 35ms and Tx to 200ms in 100 usec units */
pps->t3 = 35 * 10;
pps->tx = 200 * 10;
}
DRM_DEBUG_DRIVER("LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d "
"divider %d port %d powerdown_on_reset %d\n",
pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx,
pps->divider, pps->port, pps->powerdown_on_reset);
}
static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv,
struct intel_lvds_pps *pps)
{
u32 val;
val = I915_READ(PP_CONTROL(0));
WARN_ON((val & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS);
if (pps->powerdown_on_reset)
val |= PANEL_POWER_RESET;
I915_WRITE(PP_CONTROL(0), val);
I915_WRITE(PP_ON_DELAYS(0), (pps->port << PANEL_PORT_SELECT_SHIFT) |
(pps->t1_t2 << PANEL_POWER_UP_DELAY_SHIFT) |
(pps->t5 << PANEL_LIGHT_ON_DELAY_SHIFT));
I915_WRITE(PP_OFF_DELAYS(0), (pps->t3 << PANEL_POWER_DOWN_DELAY_SHIFT) |
(pps->tx << PANEL_LIGHT_OFF_DELAY_SHIFT));
val = pps->divider << PP_REFERENCE_DIVIDER_SHIFT;
val |= (DIV_ROUND_UP(pps->t4, 1000) + 1) <<
PANEL_POWER_CYCLE_DELAY_SHIFT;
I915_WRITE(PP_DIVISOR(0), val);
}
static void intel_pre_enable_lvds(struct intel_encoder *encoder)
{
struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
@ -154,7 +248,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder)
assert_pll_disabled(dev_priv, pipe);
}
temp = I915_READ(lvds_encoder->reg);
intel_lvds_pps_init_hw(dev_priv, &lvds_encoder->init_pps);
temp = lvds_encoder->init_lvds_val;
temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
if (HAS_PCH_CPT(dev)) {
@ -217,21 +313,12 @@ static void intel_enable_lvds(struct intel_encoder *encoder)
struct intel_connector *intel_connector =
&lvds_encoder->attached_connector->base;
struct drm_i915_private *dev_priv = to_i915(dev);
i915_reg_t ctl_reg, stat_reg;
if (HAS_PCH_SPLIT(dev)) {
ctl_reg = PCH_PP_CONTROL;
stat_reg = PCH_PP_STATUS;
} else {
ctl_reg = PP_CONTROL;
stat_reg = PP_STATUS;
}
I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN);
I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON);
I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON);
POSTING_READ(lvds_encoder->reg);
if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, PP_ON, 1000))
if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000))
DRM_ERROR("timed out waiting for panel to power on\n");
intel_panel_enable_backlight(intel_connector);
@ -242,18 +329,9 @@ static void intel_disable_lvds(struct intel_encoder *encoder)
struct drm_device *dev = encoder->base.dev;
struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
struct drm_i915_private *dev_priv = to_i915(dev);
i915_reg_t ctl_reg, stat_reg;
if (HAS_PCH_SPLIT(dev)) {
ctl_reg = PCH_PP_CONTROL;
stat_reg = PCH_PP_STATUS;
} else {
ctl_reg = PP_CONTROL;
stat_reg = PP_STATUS;
}
I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON);
if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, 0, 1000))
I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) & ~PANEL_POWER_ON);
if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, 0, 1000))
DRM_ERROR("timed out waiting for panel to power off\n");
I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN);
@ -900,17 +978,6 @@ void intel_lvds_init(struct drm_device *dev)
int pipe;
u8 pin;
/*
* Unlock registers and just leave them unlocked. Do this before
* checking quirk lists to avoid bogus WARNINGs.
*/
if (HAS_PCH_SPLIT(dev)) {
I915_WRITE(PCH_PP_CONTROL,
I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
} else if (INTEL_INFO(dev_priv)->gen < 5) {
I915_WRITE(PP_CONTROL,
I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
}
if (!intel_lvds_supported(dev))
return;
@ -943,18 +1010,6 @@ void intel_lvds_init(struct drm_device *dev)
DRM_DEBUG_KMS("LVDS is not present in VBT, but enabled anyway\n");
}
/* Set the Panel Power On/Off timings if uninitialized. */
if (INTEL_INFO(dev_priv)->gen < 5 &&
I915_READ(PP_ON_DELAYS) == 0 && I915_READ(PP_OFF_DELAYS) == 0) {
/* Set T2 to 40ms and T5 to 200ms */
I915_WRITE(PP_ON_DELAYS, 0x019007d0);
/* Set T3 to 35ms and Tx to 200ms */
I915_WRITE(PP_OFF_DELAYS, 0x015e07d0);
DRM_DEBUG_KMS("Panel power timings uninitialized, setting defaults\n");
}
lvds_encoder = kzalloc(sizeof(*lvds_encoder), GFP_KERNEL);
if (!lvds_encoder)
return;
@ -1020,6 +1075,10 @@ void intel_lvds_init(struct drm_device *dev)
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_ASPECT);
intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT;
intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps);
lvds_encoder->init_lvds_val = lvds;
/*
* LVDS discovery:
* 1) check for EDID on DDC

View file

@ -171,8 +171,8 @@ struct overlay_registers {
struct intel_overlay {
struct drm_i915_private *i915;
struct intel_crtc *crtc;
struct drm_i915_gem_object *vid_bo;
struct drm_i915_gem_object *old_vid_bo;
struct i915_vma *vma;
struct i915_vma *old_vma;
bool active;
bool pfit_active;
u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
@ -196,7 +196,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay)
if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
else
regs = io_mapping_map_wc(dev_priv->ggtt.mappable,
regs = io_mapping_map_wc(&dev_priv->ggtt.mappable,
overlay->flip_addr,
PAGE_SIZE);
@ -317,15 +317,17 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
{
struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip);
struct drm_i915_gem_object *obj = overlay->old_vid_bo;
struct i915_vma *vma;
i915_gem_track_fb(obj, NULL,
vma = fetch_and_zero(&overlay->old_vma);
if (WARN_ON(!vma))
return;
i915_gem_track_fb(vma->obj, NULL,
INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
i915_gem_object_ggtt_unpin(obj);
i915_gem_object_put(obj);
overlay->old_vid_bo = NULL;
i915_gem_object_unpin_from_display_plane(vma);
i915_vma_put(vma);
}
static void intel_overlay_off_tail(struct i915_gem_active *active,
@ -333,15 +335,15 @@ static void intel_overlay_off_tail(struct i915_gem_active *active,
{
struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip);
struct drm_i915_gem_object *obj = overlay->vid_bo;
struct i915_vma *vma;
/* never have the overlay hw on without showing a frame */
if (WARN_ON(!obj))
vma = fetch_and_zero(&overlay->vma);
if (WARN_ON(!vma))
return;
i915_gem_object_ggtt_unpin(obj);
i915_gem_object_put(obj);
overlay->vid_bo = NULL;
i915_gem_object_unpin_from_display_plane(vma);
i915_vma_put(vma);
overlay->crtc->overlay = NULL;
overlay->crtc = NULL;
@ -421,7 +423,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
/* Only wait if there is actually an old frame to release to
* guarantee forward progress.
*/
if (!overlay->old_vid_bo)
if (!overlay->old_vma)
return 0;
if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
@ -744,6 +746,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
struct drm_i915_private *dev_priv = overlay->i915;
u32 swidth, swidthsw, sheight, ostride;
enum pipe pipe = overlay->crtc->pipe;
struct i915_vma *vma;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
@ -752,12 +755,12 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
if (ret != 0)
return ret;
ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
vma = i915_gem_object_pin_to_display_plane(new_bo, 0,
&i915_ggtt_view_normal);
if (ret != 0)
return ret;
if (IS_ERR(vma))
return PTR_ERR(vma);
ret = i915_gem_object_put_fence(new_bo);
ret = i915_vma_put_fence(vma);
if (ret)
goto out_unpin;
@ -798,7 +801,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
swidth = params->src_w;
swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
sheight = params->src_h;
iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
iowrite32(i915_ggtt_offset(vma) + params->offset_Y, &regs->OBUF_0Y);
ostride = params->stride_Y;
if (params->format & I915_OVERLAY_YUV_PLANAR) {
@ -812,8 +815,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
params->src_w/uv_hscale);
swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
sheight |= (params->src_h/uv_vscale) << 16;
iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
iowrite32(i915_ggtt_offset(vma) + params->offset_U,
&regs->OBUF_0U);
iowrite32(i915_ggtt_offset(vma) + params->offset_V,
&regs->OBUF_0V);
ostride |= params->stride_UV << 16;
}
@ -834,18 +839,18 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
if (ret)
goto out_unpin;
i915_gem_track_fb(overlay->vid_bo, new_bo,
i915_gem_track_fb(overlay->vma->obj, new_bo,
INTEL_FRONTBUFFER_OVERLAY(pipe));
overlay->old_vid_bo = overlay->vid_bo;
overlay->vid_bo = new_bo;
overlay->old_vma = overlay->vma;
overlay->vma = vma;
intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
return 0;
out_unpin:
i915_gem_object_ggtt_unpin(new_bo);
i915_gem_object_unpin_from_display_plane(vma);
return ret;
}
@ -1368,6 +1373,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
struct intel_overlay *overlay;
struct drm_i915_gem_object *reg_bo;
struct overlay_registers __iomem *regs;
struct i915_vma *vma = NULL;
int ret;
if (!HAS_OVERLAY(dev_priv))
@ -1401,13 +1407,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
}
overlay->flip_addr = reg_bo->phys_handle->busaddr;
} else {
ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
vma = i915_gem_object_ggtt_pin(reg_bo, NULL,
0, PAGE_SIZE, PIN_MAPPABLE);
if (ret) {
if (IS_ERR(vma)) {
DRM_ERROR("failed to pin overlay register bo\n");
ret = PTR_ERR(vma);
goto out_free_bo;
}
overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
overlay->flip_addr = i915_ggtt_offset(vma);
ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
if (ret) {
@ -1439,8 +1446,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
return;
out_unpin_bo:
if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
i915_gem_object_ggtt_unpin(reg_bo);
if (vma)
i915_vma_unpin(vma);
out_free_bo:
i915_gem_object_put(reg_bo);
out_free:
@ -1482,7 +1489,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
regs = (struct overlay_registers __iomem *)
overlay->reg_bo->phys_handle->vaddr;
else
regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable,
regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable,
overlay->flip_addr);
return regs;

View file

@ -3115,8 +3115,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate)
total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id];
}
WARN_ON(cstate->plane_mask && total_data_rate == 0);
return total_data_rate;
}
@ -3920,9 +3918,24 @@ skl_compute_ddb(struct drm_atomic_state *state)
* pretend that all pipes switched active status so that we'll
* ensure a full DDB recompute.
*/
if (dev_priv->wm.distrust_bios_wm)
if (dev_priv->wm.distrust_bios_wm) {
ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
state->acquire_ctx);
if (ret)
return ret;
intel_state->active_pipe_changes = ~0;
/*
* We usually only initialize intel_state->active_crtcs if we
* we're doing a modeset; make sure this field is always
* initialized during the sanitization process that happens
* on the first commit too.
*/
if (!intel_state->modeset)
intel_state->active_crtcs = dev_priv->active_crtcs;
}
/*
* If the modeset changes which CRTC's are active, we need to
* recompute the DDB allocation for *all* active pipes, even
@ -5675,8 +5688,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
u32 pcbr;
int pctx_size = 24*1024;
mutex_lock(&dev_priv->drm.struct_mutex);
pcbr = I915_READ(VLV_PCBR);
if (pcbr) {
/* BIOS set it up already, grab the pre-alloc'd space */
@ -5712,7 +5723,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
out:
DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
dev_priv->vlv_pctx = pctx;
mutex_unlock(&dev_priv->drm.struct_mutex);
}
static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
@ -6488,6 +6498,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
intel_runtime_pm_get(dev_priv);
}
mutex_lock(&dev_priv->drm.struct_mutex);
mutex_lock(&dev_priv->rps.hw_lock);
/* Initialize RPS limits (for userspace) */
@ -6529,6 +6540,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
dev_priv->rps.boost_freq = dev_priv->rps.max_freq;
mutex_unlock(&dev_priv->rps.hw_lock);
mutex_unlock(&dev_priv->drm.struct_mutex);
intel_autoenable_gt_powersave(dev_priv);
}

View file

@ -176,7 +176,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
{
struct intel_ring *ring = req->ring;
u32 scratch_addr =
req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
int ret;
ret = intel_ring_begin(req, 6);
@ -212,7 +212,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
struct intel_ring *ring = req->ring;
u32 scratch_addr =
req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
u32 flags = 0;
int ret;
@ -286,7 +286,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
struct intel_ring *ring = req->ring;
u32 scratch_addr =
req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
u32 flags = 0;
int ret;
@ -370,7 +370,8 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req,
static int
gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
u32 flags = 0;
int ret;
@ -466,7 +467,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
mmio = RING_HWS_PGA(engine->mmio_base);
}
I915_WRITE(mmio, (u32)engine->status_page.gfx_addr);
I915_WRITE(mmio, engine->status_page.ggtt_offset);
POSTING_READ(mmio);
/*
@ -497,7 +498,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
if (!IS_GEN2(dev_priv)) {
if (INTEL_GEN(dev_priv) > 2) {
I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
if (intel_wait_for_register(dev_priv,
RING_MI_MODE(engine->mmio_base),
@ -519,7 +520,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
I915_WRITE_HEAD(engine, 0);
I915_WRITE_TAIL(engine, 0);
if (!IS_GEN2(dev_priv)) {
if (INTEL_GEN(dev_priv) > 2) {
(void)I915_READ_CTL(engine);
I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
}
@ -531,7 +532,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
struct intel_ring *ring = engine->buffer;
struct drm_i915_gem_object *obj = ring->obj;
int ret = 0;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@ -571,7 +571,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
* registers with the above sequence (the readback of the HEAD registers
* also enforces ordering), otherwise the hw might lose the new ring
* register values. */
I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
I915_WRITE_START(engine, i915_ggtt_offset(ring->vma));
/* WaClearRingBufHeadRegAtInit:ctg,elk */
if (I915_READ_HEAD(engine))
@ -586,16 +586,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
/* If the head is still not zero, the ring is dead */
if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
(I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
DRM_ERROR("%s initialization failed "
"ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
"ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
engine->name,
I915_READ_CTL(engine),
I915_READ_CTL(engine) & RING_VALID,
I915_READ_HEAD(engine), I915_READ_TAIL(engine),
I915_READ_START(engine),
(unsigned long)i915_gem_obj_ggtt_offset(obj));
i915_ggtt_offset(ring->vma));
ret = -EIO;
goto out;
}
@ -613,48 +613,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
return ret;
}
void intel_fini_pipe_control(struct intel_engine_cs *engine)
{
if (engine->scratch.obj == NULL)
return;
i915_gem_object_ggtt_unpin(engine->scratch.obj);
i915_gem_object_put(engine->scratch.obj);
engine->scratch.obj = NULL;
}
int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
{
struct drm_i915_gem_object *obj;
int ret;
WARN_ON(engine->scratch.obj);
obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
if (!obj)
obj = i915_gem_object_create(&engine->i915->drm, size);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate scratch page\n");
ret = PTR_ERR(obj);
goto err;
}
ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
if (ret)
goto err_unref;
engine->scratch.obj = obj;
engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
engine->name, engine->scratch.gtt_offset);
return 0;
err_unref:
i915_gem_object_put(engine->scratch.obj);
err:
return ret;
}
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
{
struct intel_ring *ring = req->ring;
@ -1300,13 +1258,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
if (dev_priv->semaphore_obj) {
i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
i915_gem_object_put(dev_priv->semaphore_obj);
dev_priv->semaphore_obj = NULL;
}
intel_fini_pipe_control(engine);
i915_vma_unpin_and_release(&dev_priv->semaphore);
}
static int gen8_rcs_signal(struct drm_i915_gem_request *req)
@ -1317,7 +1269,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req)
enum intel_engine_id id;
int ret, num_rings;
num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
num_rings = INTEL_INFO(dev_priv)->num_rings;
ret = intel_ring_begin(req, (num_rings-1) * 8);
if (ret)
return ret;
@ -1354,7 +1306,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req)
enum intel_engine_id id;
int ret, num_rings;
num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
num_rings = INTEL_INFO(dev_priv)->num_rings;
ret = intel_ring_begin(req, (num_rings-1) * 6);
if (ret)
return ret;
@ -1385,18 +1337,21 @@ static int gen6_signal(struct drm_i915_gem_request *req)
{
struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *useless;
enum intel_engine_id id;
struct intel_engine_cs *engine;
int ret, num_rings;
num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
num_rings = INTEL_INFO(dev_priv)->num_rings;
ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
if (ret)
return ret;
for_each_engine_id(useless, dev_priv, id) {
i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id];
for_each_engine(engine, dev_priv) {
i915_reg_t mbox_reg;
if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
continue;
mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
if (i915_mmio_reg_valid(mbox_reg)) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, mbox_reg);
@ -1543,7 +1498,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req,
u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id];
u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id];
int ret;
WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
@ -1764,7 +1719,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
unsigned int dispatch_flags)
{
struct intel_ring *ring = req->ring;
u32 cs_offset = req->engine->scratch.gtt_offset;
u32 cs_offset = i915_ggtt_offset(req->engine->scratch);
int ret;
ret = intel_ring_begin(req, 6);
@ -1853,79 +1808,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
static void cleanup_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = engine->status_page.obj;
if (obj == NULL)
vma = fetch_and_zero(&engine->status_page.vma);
if (!vma)
return;
kunmap(sg_page(obj->pages->sgl));
i915_gem_object_ggtt_unpin(obj);
i915_gem_object_put(obj);
engine->status_page.obj = NULL;
i915_vma_unpin(vma);
i915_gem_object_unpin_map(vma->obj);
i915_vma_put(vma);
}
static int init_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *obj = engine->status_page.obj;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
unsigned int flags;
int ret;
if (obj == NULL) {
unsigned flags;
int ret;
obj = i915_gem_object_create(&engine->i915->drm, 4096);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate status page\n");
return PTR_ERR(obj);
}
ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
if (ret)
goto err_unref;
flags = 0;
if (!HAS_LLC(engine->i915))
/* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
* Though this restriction is not documented for
* gen4, gen5, or byt, they also behave similarly
* and hang if the HWS is placed at the top of the
* GTT. To generalise, it appears that all !llc
* platforms have issues with us placing the HWS
* above the mappable region (even though we never
* actualy map it).
*/
flags |= PIN_MAPPABLE;
ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
if (ret) {
err_unref:
i915_gem_object_put(obj);
return ret;
}
engine->status_page.obj = obj;
obj = i915_gem_object_create(&engine->i915->drm, 4096);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate status page\n");
return PTR_ERR(obj);
}
engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
memset(engine->status_page.page_addr, 0, PAGE_SIZE);
ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
if (ret)
goto err;
vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
}
flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915))
/* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
* Though this restriction is not documented for
* gen4, gen5, or byt, they also behave similarly
* and hang if the HWS is placed at the top of the
* GTT. To generalise, it appears that all !llc
* platforms have issues with us placing the HWS
* above the mappable region (even though we never
* actualy map it).
*/
flags |= PIN_MAPPABLE;
ret = i915_vma_pin(vma, 0, 4096, flags);
if (ret)
goto err;
engine->status_page.vma = vma;
engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
engine->status_page.page_addr =
i915_gem_object_pin_map(obj, I915_MAP_WB);
DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
engine->name, engine->status_page.gfx_addr);
engine->name, i915_ggtt_offset(vma));
return 0;
err:
i915_gem_object_put(obj);
return ret;
}
static int init_phys_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
if (!dev_priv->status_page_dmah) {
dev_priv->status_page_dmah =
drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
if (!dev_priv->status_page_dmah)
return -ENOMEM;
}
dev_priv->status_page_dmah =
drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
if (!dev_priv->status_page_dmah)
return -ENOMEM;
engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
memset(engine->status_page.page_addr, 0, PAGE_SIZE);
@ -1935,55 +1890,46 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
int intel_ring_pin(struct intel_ring *ring)
{
struct drm_i915_private *dev_priv = ring->engine->i915;
struct drm_i915_gem_object *obj = ring->obj;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
unsigned flags = PIN_OFFSET_BIAS | 4096;
unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
enum i915_map_type map;
struct i915_vma *vma = ring->vma;
void *addr;
int ret;
if (HAS_LLC(dev_priv) && !obj->stolen) {
ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
if (ret)
GEM_BUG_ON(ring->vaddr);
map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
else
ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
if (unlikely(ret))
return ret;
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
addr = i915_gem_object_pin_map(obj);
if (IS_ERR(addr)) {
ret = PTR_ERR(addr);
goto err_unpin;
}
} else {
ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
flags | PIN_MAPPABLE);
if (ret)
return ret;
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
goto err_unpin;
/* Access through the GTT requires the device to be awake. */
assert_rpm_wakelock_held(dev_priv);
addr = (void __force *)
i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
if (IS_ERR(addr)) {
ret = PTR_ERR(addr);
goto err_unpin;
}
}
ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags);
if (unlikely(ret))
return ret;
if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma);
else
addr = i915_gem_object_pin_map(vma->obj, map);
if (IS_ERR(addr))
goto err;
ring->vaddr = addr;
ring->vma = i915_gem_obj_to_ggtt(obj);
return 0;
err_unpin:
i915_gem_object_ggtt_unpin(obj);
return ret;
err:
i915_vma_unpin(vma);
return PTR_ERR(addr);
}
void intel_ring_unpin(struct intel_ring *ring)
@ -1991,60 +1937,54 @@ void intel_ring_unpin(struct intel_ring *ring)
GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr);
if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
i915_gem_object_unpin_map(ring->obj);
else
if (i915_vma_is_map_and_fenceable(ring->vma))
i915_vma_unpin_iomap(ring->vma);
else
i915_gem_object_unpin_map(ring->vma->obj);
ring->vaddr = NULL;
i915_gem_object_ggtt_unpin(ring->obj);
ring->vma = NULL;
i915_vma_unpin(ring->vma);
}
static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
{
i915_gem_object_put(ring->obj);
ring->obj = NULL;
}
static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
struct intel_ring *ring)
static struct i915_vma *
intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = NULL;
if (!HAS_LLC(dev))
obj = i915_gem_object_create_stolen(dev, ring->size);
if (obj == NULL)
obj = i915_gem_object_create(dev, ring->size);
obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
if (!obj)
obj = i915_gem_object_create(&dev_priv->drm, size);
if (IS_ERR(obj))
return PTR_ERR(obj);
return ERR_CAST(obj);
/* mark ring buffers as read-only from GPU side by default */
obj->gt_ro = 1;
ring->obj = obj;
vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
if (IS_ERR(vma))
goto err;
return 0;
return vma;
err:
i915_gem_object_put(obj);
return vma;
}
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
{
struct intel_ring *ring;
int ret;
struct i915_vma *vma;
GEM_BUG_ON(!is_power_of_2(size));
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (ring == NULL) {
DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
engine->name);
if (!ring)
return ERR_PTR(-ENOMEM);
}
ring->engine = engine;
list_add(&ring->link, &engine->buffers);
INIT_LIST_HEAD(&ring->request_list);
@ -2060,22 +2000,21 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
ring->last_retired_head = -1;
intel_ring_update_space(ring);
ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring);
if (ret) {
DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
engine->name, ret);
list_del(&ring->link);
vma = intel_ring_create_vma(engine->i915, size);
if (IS_ERR(vma)) {
kfree(ring);
return ERR_PTR(ret);
return ERR_CAST(vma);
}
ring->vma = vma;
list_add(&ring->link, &engine->buffers);
return ring;
}
void
intel_ring_free(struct intel_ring *ring)
{
intel_destroy_ringbuffer_obj(ring);
i915_vma_put(ring->vma);
list_del(&ring->link);
kfree(ring);
}
@ -2092,8 +2031,12 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
return 0;
if (ce->state) {
ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
ctx->ggtt_alignment, 0);
ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false);
if (ret)
goto error;
ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment,
PIN_GLOBAL | PIN_HIGH);
if (ret)
goto error;
}
@ -2127,7 +2070,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx,
return;
if (ce->state)
i915_gem_object_ggtt_unpin(ce->state);
i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
@ -2165,7 +2108,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
ret = PTR_ERR(ring);
goto error;
}
engine->buffer = ring;
if (I915_NEED_GFX_HWS(dev_priv)) {
ret = init_status_page(engine);
@ -2180,11 +2122,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
ret = intel_ring_pin(ring);
if (ret) {
DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
engine->name, ret);
intel_destroy_ringbuffer_obj(ring);
intel_ring_free(ring);
goto error;
}
engine->buffer = ring;
return 0;
@ -2203,7 +2144,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
dev_priv = engine->i915;
if (engine->buffer) {
WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0);
WARN_ON(INTEL_GEN(dev_priv) > 2 &&
(I915_READ_MODE(engine) & MODE_IDLE) == 0);
intel_ring_unpin(engine->buffer);
intel_ring_free(engine->buffer);
@ -2371,50 +2313,6 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
return 0;
}
void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
{
struct drm_i915_private *dev_priv = engine->i915;
/* Our semaphore implementation is strictly monotonic (i.e. we proceed
* so long as the semaphore value in the register/page is greater
* than the sync value), so whenever we reset the seqno,
* so long as we reset the tracking semaphore value to 0, it will
* always be before the next request's seqno. If we don't reset
* the semaphore value, then when the seqno moves backwards all
* future waits will complete instantly (causing rendering corruption).
*/
if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
if (HAS_VEBOX(dev_priv))
I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
}
if (dev_priv->semaphore_obj) {
struct drm_i915_gem_object *obj = dev_priv->semaphore_obj;
struct page *page = i915_gem_object_get_dirty_page(obj, 0);
void *semaphores = kmap(page);
memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
kunmap(page);
}
memset(engine->semaphore.sync_seqno, 0,
sizeof(engine->semaphore.sync_seqno));
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
engine->last_submitted_seqno = seqno;
engine->hangcheck.seqno = seqno;
/* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno.
*/
rcu_read_lock();
intel_engine_wakeup(engine);
rcu_read_unlock();
}
static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
{
struct drm_i915_private *dev_priv = request->i915;
@ -2624,35 +2522,36 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
if (!i915.semaphores)
return;
if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) {
if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
struct i915_vma *vma;
obj = i915_gem_object_create(&dev_priv->drm, 4096);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
i915.semaphores = 0;
} else {
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
if (ret != 0) {
i915_gem_object_put(obj);
DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
i915.semaphores = 0;
} else {
dev_priv->semaphore_obj = obj;
}
}
if (IS_ERR(obj))
goto err;
vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
if (IS_ERR(vma))
goto err_obj;
ret = i915_gem_object_set_to_gtt_domain(obj, false);
if (ret)
goto err_obj;
ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (ret)
goto err_obj;
dev_priv->semaphore = vma;
}
if (!i915.semaphores)
return;
if (INTEL_GEN(dev_priv) >= 8) {
u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj);
u32 offset = i915_ggtt_offset(dev_priv->semaphore);
engine->semaphore.sync_to = gen8_ring_sync_to;
engine->semaphore.signal = gen8_xcs_signal;
for (i = 0; i < I915_NUM_ENGINES; i++) {
u64 ring_offset;
u32 ring_offset;
if (i != engine->id)
ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
@ -2672,47 +2571,55 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
* initialized as INVALID. Gen8 will initialize the
* sema between VCS2 and RCS later.
*/
for (i = 0; i < I915_NUM_ENGINES; i++) {
for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
static const struct {
u32 wait_mbox;
i915_reg_t mbox_reg;
} sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
[RCS] = {
[VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
[BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
[RCS_HW] = {
[VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
[BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
},
[VCS] = {
[RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
[BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
[VCS_HW] = {
[RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
[BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
},
[BCS] = {
[RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
[VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
[BCS_HW] = {
[RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
[VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
},
[VECS] = {
[RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
[VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
[BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
[VECS_HW] = {
[RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
[VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
[BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
},
};
u32 wait_mbox;
i915_reg_t mbox_reg;
if (i == engine->id || i == VCS2) {
if (i == engine->hw_id) {
wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
mbox_reg = GEN6_NOSYNC;
} else {
wait_mbox = sem_data[engine->id][i].wait_mbox;
mbox_reg = sem_data[engine->id][i].mbox_reg;
wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
}
engine->semaphore.mbox.wait[i] = wait_mbox;
engine->semaphore.mbox.signal[i] = mbox_reg;
}
}
return;
err_obj:
i915_gem_object_put(obj);
err:
DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
i915.semaphores = 0;
}
static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
@ -2808,11 +2715,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
return ret;
if (INTEL_GEN(dev_priv) >= 6) {
ret = intel_init_pipe_control(engine, 4096);
ret = intel_engine_create_scratch(engine, 4096);
if (ret)
return ret;
} else if (HAS_BROKEN_CS_TLB(dev_priv)) {
ret = intel_init_pipe_control(engine, I830_WA_SIZE);
ret = intel_engine_create_scratch(engine, I830_WA_SIZE);
if (ret)
return ret;
}

View file

@ -26,10 +26,10 @@
*/
#define I915_RING_FREE_SPACE 64
struct intel_hw_status_page {
u32 *page_addr;
unsigned int gfx_addr;
struct drm_i915_gem_object *obj;
struct intel_hw_status_page {
struct i915_vma *vma;
u32 *page_addr;
u32 ggtt_offset;
};
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@ -57,10 +57,10 @@ struct intel_hw_status_page {
#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
(((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
#define GEN8_SIGNAL_OFFSET(__ring, to) \
(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
(dev_priv->semaphore->node.start + \
GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
#define GEN8_WAIT_OFFSET(__ring, from) \
(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
(dev_priv->semaphore->node.start + \
GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
enum intel_engine_hangcheck_action {
@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action {
struct intel_engine_hangcheck {
u64 acthd;
unsigned long user_interrupts;
u32 seqno;
int score;
enum intel_engine_hangcheck_action action;
@ -84,9 +83,8 @@ struct intel_engine_hangcheck {
};
struct intel_ring {
struct drm_i915_gem_object *obj;
void *vaddr;
struct i915_vma *vma;
void *vaddr;
struct intel_engine_cs *engine;
struct list_head link;
@ -124,12 +122,12 @@ struct drm_i915_reg_table;
* an option for future use.
* size: size of the batch in DWORDS
*/
struct i915_ctx_workarounds {
struct i915_ctx_workarounds {
struct i915_wa_ctx_bb {
u32 offset;
u32 size;
} indirect_ctx, per_ctx;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
};
struct drm_i915_gem_request;
@ -147,8 +145,14 @@ struct intel_engine_cs {
#define I915_NUM_ENGINES 5
#define _VCS(n) (VCS + (n))
unsigned int exec_id;
unsigned int hw_id;
unsigned int guc_id; /* XXX same as hw_id? */
enum intel_engine_hw_id {
RCS_HW = 0,
VCS_HW,
BCS_HW,
VECS_HW,
VCS2_HW
} hw_id;
enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
u64 fence_context;
u32 mmio_base;
unsigned int irq_shift;
@ -172,8 +176,7 @@ struct intel_engine_cs {
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
struct task_struct *irq_seqno_bh; /* bh for user interrupts */
unsigned long irq_wakeups;
struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
bool irq_posted;
spinlock_t lock; /* protects the lists of requests */
@ -183,6 +186,9 @@ struct intel_engine_cs {
struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */
unsigned long timeout;
bool irq_enabled : 1;
bool rpm_wakelock : 1;
@ -197,6 +203,7 @@ struct intel_engine_cs {
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
struct i915_vma *scratch;
u32 irq_keep_mask; /* always keep these interrupts */
u32 irq_enable_mask; /* bitmask to enable ring interrupt */
@ -270,11 +277,14 @@ struct intel_engine_cs {
u32 sync_seqno[I915_NUM_ENGINES-1];
union {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
struct {
/* our mbox written by others */
u32 wait[I915_NUM_ENGINES];
u32 wait[GEN6_NUM_SEMAPHORES];
/* mboxes this ring signals to */
i915_reg_t signal[I915_NUM_ENGINES];
i915_reg_t signal[GEN6_NUM_SEMAPHORES];
} mbox;
u64 signal_ggtt[I915_NUM_ENGINES];
};
@ -310,7 +320,7 @@ struct intel_engine_cs {
/* An RCU guarded pointer to the last request. No reference is
* held to the request, users must carefully acquire a reference to
* the request using i915_gem_active_get_request_rcu(), or hold the
* the request using i915_gem_active_get_rcu(), or hold the
* struct_mutex.
*/
struct i915_gem_active last_request;
@ -319,11 +329,6 @@ struct intel_engine_cs {
struct intel_engine_hangcheck hangcheck;
struct {
struct drm_i915_gem_object *obj;
u32 gtt_offset;
} scratch;
bool needs_cmd_parser;
/*
@ -475,11 +480,9 @@ void intel_ring_update_space(struct intel_ring *ring);
void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
void intel_fini_pipe_control(struct intel_engine_cs *engine);
void intel_engine_setup_common(struct intel_engine_cs *engine);
int intel_engine_init_common(struct intel_engine_cs *engine);
int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
static inline int intel_engine_idle(struct intel_engine_cs *engine,
@ -515,7 +518,7 @@ int init_workarounds_ring(struct intel_engine_cs *engine);
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR;
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
@ -538,29 +541,35 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
{
return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
}
static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
{
bool wakeup = false;
struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
/* Note that for this not to dangerously chase a dangling pointer,
* the caller is responsible for ensure that the task remain valid for
* wake_up_process() i.e. that the RCU grace period cannot expire.
* we must hold the rcu_read_lock here.
*
* Also note that tsk is likely to be in !TASK_RUNNING state so an
* early test for tsk->state != TASK_RUNNING before wake_up_process()
* is unlikely to be beneficial.
*/
if (tsk)
wakeup = wake_up_process(tsk);
if (intel_engine_has_waiter(engine)) {
struct task_struct *tsk;
rcu_read_lock();
tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
if (tsk)
wakeup = wake_up_process(tsk);
rcu_read_unlock();
}
return wakeup;
}
void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_kick_waiters(struct drm_i915_private *i915);
unsigned int intel_kick_signalers(struct drm_i915_private *i915);

View file

@ -592,6 +592,8 @@ void bxt_disable_dc9(struct drm_i915_private *dev_priv)
DRM_DEBUG_KMS("Disabling DC9\n");
gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
intel_pps_unlock_regs_wa(dev_priv);
}
static void assert_csr_loaded(struct drm_i915_private *dev_priv)
@ -854,7 +856,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
struct i915_power_well *power_well)
{
enum skl_disp_power_wells power_well_id = power_well->data;
struct i915_power_well *cmn_a_well;
struct i915_power_well *cmn_a_well = NULL;
if (power_well_id == BXT_DPIO_CMN_BC) {
/*
@ -867,7 +869,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well));
if (power_well_id == BXT_DPIO_CMN_BC)
if (cmn_a_well)
intel_power_well_put(dev_priv, cmn_a_well);
}
@ -1121,6 +1123,8 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
}
i915_redisable_vga_power_on(&dev_priv->drm);
intel_pps_unlock_regs_wa(dev_priv);
}
static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)

View file

@ -203,21 +203,19 @@ skl_update_plane(struct drm_plane *drm_plane,
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_plane *intel_plane = to_intel_plane(drm_plane);
struct drm_framebuffer *fb = plane_state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
const int pipe = intel_plane->pipe;
const int plane = intel_plane->plane + 1;
u32 plane_ctl, stride_div, stride;
u32 plane_ctl;
const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
u32 surf_addr;
u32 tile_height, plane_offset, plane_size;
u32 surf_addr = plane_state->main.offset;
unsigned int rotation = plane_state->base.rotation;
int x_offset, y_offset;
u32 stride = skl_plane_stride(fb, 0, rotation);
int crtc_x = plane_state->base.dst.x1;
int crtc_y = plane_state->base.dst.y1;
uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
uint32_t x = plane_state->base.src.x1 >> 16;
uint32_t y = plane_state->base.src.y1 >> 16;
uint32_t x = plane_state->main.x;
uint32_t y = plane_state->main.y;
uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
@ -230,15 +228,6 @@ skl_update_plane(struct drm_plane *drm_plane,
plane_ctl |= skl_plane_ctl_rotation(rotation);
stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
fb->pixel_format);
/* Sizes are 0 based */
src_w--;
src_h--;
crtc_w--;
crtc_h--;
if (key->flags) {
I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value);
I915_WRITE(PLANE_KEYMAX(pipe, plane), key->max_value);
@ -250,28 +239,15 @@ skl_update_plane(struct drm_plane *drm_plane,
else if (key->flags & I915_SET_COLORKEY_SOURCE)
plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
surf_addr = intel_plane_obj_offset(intel_plane, obj, 0);
/* Sizes are 0 based */
src_w--;
src_h--;
crtc_w--;
crtc_h--;
if (intel_rotation_90_or_270(rotation)) {
int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
/* stride: Surface height in tiles */
tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
stride = DIV_ROUND_UP(fb->height, tile_height);
plane_size = (src_w << 16) | src_h;
x_offset = stride * tile_height - y - (src_h + 1);
y_offset = x;
} else {
stride = fb->pitches[0] / stride_div;
plane_size = (src_h << 16) | src_w;
x_offset = x;
y_offset = y;
}
plane_offset = y_offset << 16 | x_offset;
I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset);
I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x);
I915_WRITE(PLANE_STRIDE(pipe, plane), stride);
I915_WRITE(PLANE_SIZE(pipe, plane), plane_size);
I915_WRITE(PLANE_SIZE(pipe, plane), (src_h << 16) | src_w);
/* program plane scaler */
if (plane_state->scaler_id >= 0) {
@ -296,7 +272,8 @@ skl_update_plane(struct drm_plane *drm_plane,
}
I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl);
I915_WRITE(PLANE_SURF(pipe, plane), surf_addr);
I915_WRITE(PLANE_SURF(pipe, plane),
intel_fb_gtt_offset(fb, rotation) + surf_addr);
POSTING_READ(PLANE_SURF(pipe, plane));
}
@ -363,13 +340,11 @@ vlv_update_plane(struct drm_plane *dplane,
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_plane *intel_plane = to_intel_plane(dplane);
struct drm_framebuffer *fb = plane_state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
int pipe = intel_plane->pipe;
int plane = intel_plane->plane;
u32 sprctl;
u32 sprsurf_offset, linear_offset;
unsigned int rotation = dplane->state->rotation;
int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
int crtc_x = plane_state->base.dst.x1;
int crtc_y = plane_state->base.dst.y1;
@ -431,7 +406,7 @@ vlv_update_plane(struct drm_plane *dplane,
*/
sprctl |= SP_GAMMA_ENABLE;
if (i915_gem_object_is_tiled(obj))
if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
sprctl |= SP_TILED;
/* Sizes are 0 based */
@ -440,19 +415,18 @@ vlv_update_plane(struct drm_plane *dplane,
crtc_w--;
crtc_h--;
linear_offset = y * fb->pitches[0] + x * cpp;
sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0,
fb->pitches[0], rotation);
linear_offset -= sprsurf_offset;
intel_add_fb_offsets(&x, &y, plane_state, 0);
sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
if (rotation == DRM_ROTATE_180) {
sprctl |= SP_ROTATE_180;
x += src_w;
y += src_h;
linear_offset += src_h * fb->pitches[0] + src_w * cpp;
}
linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
if (key->flags) {
I915_WRITE(SPKEYMINVAL(pipe, plane), key->min_value);
I915_WRITE(SPKEYMAXVAL(pipe, plane), key->max_value);
@ -468,7 +442,7 @@ vlv_update_plane(struct drm_plane *dplane,
I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
if (i915_gem_object_is_tiled(obj))
if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x);
else
I915_WRITE(SPLINOFF(pipe, plane), linear_offset);
@ -477,8 +451,8 @@ vlv_update_plane(struct drm_plane *dplane,
I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
I915_WRITE(SPCNTR(pipe, plane), sprctl);
I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
sprsurf_offset);
I915_WRITE(SPSURF(pipe, plane),
intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
POSTING_READ(SPSURF(pipe, plane));
}
@ -506,12 +480,10 @@ ivb_update_plane(struct drm_plane *plane,
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_plane *intel_plane = to_intel_plane(plane);
struct drm_framebuffer *fb = plane_state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
enum pipe pipe = intel_plane->pipe;
u32 sprctl, sprscale = 0;
u32 sprsurf_offset, linear_offset;
unsigned int rotation = plane_state->base.rotation;
int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
int crtc_x = plane_state->base.dst.x1;
int crtc_y = plane_state->base.dst.y1;
@ -553,7 +525,7 @@ ivb_update_plane(struct drm_plane *plane,
*/
sprctl |= SPRITE_GAMMA_ENABLE;
if (i915_gem_object_is_tiled(obj))
if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
sprctl |= SPRITE_TILED;
if (IS_HASWELL(dev) || IS_BROADWELL(dev))
@ -573,10 +545,8 @@ ivb_update_plane(struct drm_plane *plane,
if (crtc_w != src_w || crtc_h != src_h)
sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h;
linear_offset = y * fb->pitches[0] + x * cpp;
sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0,
fb->pitches[0], rotation);
linear_offset -= sprsurf_offset;
intel_add_fb_offsets(&x, &y, plane_state, 0);
sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
if (rotation == DRM_ROTATE_180) {
sprctl |= SPRITE_ROTATE_180;
@ -585,10 +555,11 @@ ivb_update_plane(struct drm_plane *plane,
if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
x += src_w;
y += src_h;
linear_offset += src_h * fb->pitches[0] + src_w * cpp;
}
}
linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
if (key->flags) {
I915_WRITE(SPRKEYVAL(pipe), key->min_value);
I915_WRITE(SPRKEYMAX(pipe), key->max_value);
@ -607,7 +578,7 @@ ivb_update_plane(struct drm_plane *plane,
* register */
if (IS_HASWELL(dev) || IS_BROADWELL(dev))
I915_WRITE(SPROFFSET(pipe), (y << 16) | x);
else if (i915_gem_object_is_tiled(obj))
else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x);
else
I915_WRITE(SPRLINOFF(pipe), linear_offset);
@ -617,7 +588,7 @@ ivb_update_plane(struct drm_plane *plane,
I915_WRITE(SPRSCALE(pipe), sprscale);
I915_WRITE(SPRCTL(pipe), sprctl);
I915_WRITE(SPRSURF(pipe),
i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
POSTING_READ(SPRSURF(pipe));
}
@ -647,12 +618,10 @@ ilk_update_plane(struct drm_plane *plane,
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_plane *intel_plane = to_intel_plane(plane);
struct drm_framebuffer *fb = plane_state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
int pipe = intel_plane->pipe;
u32 dvscntr, dvsscale;
u32 dvssurf_offset, linear_offset;
unsigned int rotation = plane_state->base.rotation;
int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
int crtc_x = plane_state->base.dst.x1;
int crtc_y = plane_state->base.dst.y1;
@ -694,7 +663,7 @@ ilk_update_plane(struct drm_plane *plane,
*/
dvscntr |= DVS_GAMMA_ENABLE;
if (i915_gem_object_is_tiled(obj))
if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
dvscntr |= DVS_TILED;
if (IS_GEN6(dev))
@ -710,19 +679,18 @@ ilk_update_plane(struct drm_plane *plane,
if (crtc_w != src_w || crtc_h != src_h)
dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;
linear_offset = y * fb->pitches[0] + x * cpp;
dvssurf_offset = intel_compute_tile_offset(&x, &y, fb, 0,
fb->pitches[0], rotation);
linear_offset -= dvssurf_offset;
intel_add_fb_offsets(&x, &y, plane_state, 0);
dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
if (rotation == DRM_ROTATE_180) {
dvscntr |= DVS_ROTATE_180;
x += src_w;
y += src_h;
linear_offset += src_h * fb->pitches[0] + src_w * cpp;
}
linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
if (key->flags) {
I915_WRITE(DVSKEYVAL(pipe), key->min_value);
I915_WRITE(DVSKEYMAX(pipe), key->max_value);
@ -737,7 +705,7 @@ ilk_update_plane(struct drm_plane *plane,
I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
if (i915_gem_object_is_tiled(obj))
if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x);
else
I915_WRITE(DVSLINOFF(pipe), linear_offset);
@ -746,7 +714,7 @@ ilk_update_plane(struct drm_plane *plane,
I915_WRITE(DVSSCALE(pipe), dvsscale);
I915_WRITE(DVSCNTR(pipe), dvscntr);
I915_WRITE(DVSSURF(pipe),
i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
intel_fb_gtt_offset(fb, rotation) + dvssurf_offset);
POSTING_READ(DVSSURF(pipe));
}
@ -785,6 +753,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
int hscale, vscale;
int max_scale, min_scale;
bool can_scale;
int ret;
src->x1 = state->base.src_x;
src->y1 = state->base.src_y;
@ -949,6 +918,12 @@ intel_check_sprite_plane(struct drm_plane *plane,
dst->y1 = crtc_y;
dst->y2 = crtc_y + crtc_h;
if (INTEL_GEN(dev) >= 9) {
ret = skl_check_plane_surface(state);
if (ret)
return ret;
}
return 0;
}

View file

@ -93,6 +93,6 @@ extern bool i915_gpu_turbo_disable(void);
#define I845_TSEG_SIZE_1M (3 << 1)
#define INTEL_BSM 0x5c
#define INTEL_BSM_MASK (0xFFFF << 20)
#define INTEL_BSM_MASK (-(1u << 20))
#endif /* _I915_DRM_H_ */

View file

@ -31,16 +31,16 @@
* See Documentation/io-mapping.txt
*/
#ifdef CONFIG_HAVE_ATOMIC_IOMAP
#include <asm/iomap.h>
struct io_mapping {
resource_size_t base;
unsigned long size;
pgprot_t prot;
void __iomem *iomem;
};
#ifdef CONFIG_HAVE_ATOMIC_IOMAP
#include <asm/iomap.h>
/*
* For small address space machines, mapping large objects
* into the kernel virtual space isn't practical. Where
@ -49,34 +49,25 @@ struct io_mapping {
*/
static inline struct io_mapping *
io_mapping_create_wc(resource_size_t base, unsigned long size)
io_mapping_init_wc(struct io_mapping *iomap,
resource_size_t base,
unsigned long size)
{
struct io_mapping *iomap;
pgprot_t prot;
iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
if (!iomap)
goto out_err;
if (iomap_create_wc(base, size, &prot))
goto out_free;
return NULL;
iomap->base = base;
iomap->size = size;
iomap->prot = prot;
return iomap;
out_free:
kfree(iomap);
out_err:
return NULL;
}
static inline void
io_mapping_free(struct io_mapping *mapping)
io_mapping_fini(struct io_mapping *mapping)
{
iomap_free(mapping->base, mapping->size);
kfree(mapping);
}
/* Atomic map/unmap */
@ -121,21 +112,46 @@ io_mapping_unmap(void __iomem *vaddr)
#else
#include <linux/uaccess.h>
/* this struct isn't actually defined anywhere */
struct io_mapping;
#include <asm/pgtable.h>
/* Create the io_mapping object*/
static inline struct io_mapping *
io_mapping_create_wc(resource_size_t base, unsigned long size)
io_mapping_init_wc(struct io_mapping *iomap,
resource_size_t base,
unsigned long size)
{
return (struct io_mapping __force *) ioremap_wc(base, size);
iomap->base = base;
iomap->size = size;
iomap->iomem = ioremap_wc(base, size);
#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */
iomap->prot = pgprot_noncached_wc(PAGE_KERNEL);
#elif defined(pgprot_writecombine)
iomap->prot = pgprot_writecombine(PAGE_KERNEL);
#else
iomap->prot = pgprot_noncached(PAGE_KERNEL);
#endif
return iomap;
}
static inline void
io_mapping_free(struct io_mapping *mapping)
io_mapping_fini(struct io_mapping *mapping)
{
iounmap(mapping->iomem);
}
/* Non-atomic map/unmap */
static inline void __iomem *
io_mapping_map_wc(struct io_mapping *mapping,
unsigned long offset,
unsigned long size)
{
return mapping->iomem + offset;
}
static inline void
io_mapping_unmap(void __iomem *vaddr)
{
iounmap((void __force __iomem *) mapping);
}
/* Atomic map/unmap */
@ -145,30 +161,42 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
{
preempt_disable();
pagefault_disable();
return ((char __force __iomem *) mapping) + offset;
return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
}
static inline void
io_mapping_unmap_atomic(void __iomem *vaddr)
{
io_mapping_unmap(vaddr);
pagefault_enable();
preempt_enable();
}
/* Non-atomic map/unmap */
static inline void __iomem *
io_mapping_map_wc(struct io_mapping *mapping,
unsigned long offset,
unsigned long size)
#endif /* HAVE_ATOMIC_IOMAP */
static inline struct io_mapping *
io_mapping_create_wc(resource_size_t base,
unsigned long size)
{
return ((char __force __iomem *) mapping) + offset;
struct io_mapping *iomap;
iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
if (!iomap)
return NULL;
if (!io_mapping_init_wc(iomap, base, size)) {
kfree(iomap);
return NULL;
}
return iomap;
}
static inline void
io_mapping_unmap(void __iomem *vaddr)
io_mapping_free(struct io_mapping *iomap)
{
io_mapping_fini(iomap);
kfree(iomap);
}
#endif /* HAVE_ATOMIC_IOMAP */
#endif /* _LINUX_IO_MAPPING_H */

View file

@ -855,7 +855,16 @@ struct drm_i915_gem_busy {
* having flushed any pending activity), and a non-zero return that
* the object is still in-flight on the GPU. (The GPU has not yet
* signaled completion for all pending requests that reference the
* object.)
* object.) An object is guaranteed to become idle eventually (so
* long as no new GPU commands are executed upon it). Due to the
* asynchronous nature of the hardware, an object reported
* as busy may become idle before the ioctl is completed.
*
* Furthermore, if the object is busy, which engine is busy is only
* provided as a guide. There are race conditions which prevent the
* report of which engines are busy from being always accurate.
* However, the converse is not true. If the object is idle, the
* result of the ioctl, that all engines are idle, is accurate.
*
* The returned dword is split into two fields to indicate both
* the engines on which the object is being read, and the
@ -878,6 +887,11 @@ struct drm_i915_gem_busy {
* execution engines, e.g. multiple media engines, which are
* mapped to the same identifier in the EXECBUFFER2 ioctl and
* so are not separately reported for busyness.
*
* Caveat emptor:
* Only the boolean result of this query is reliable; that is whether
* the object is idle or busy. The report of which engines are busy
* should be only used as a heuristic.
*/
__u32 busy;
};