drm/amdgpu: stop splitting PTE commands into smaller ones

It doesn't make much sense to create bigger commands first which we then need
to split into smaller one again. Just make sure the commands we create aren't
to big in the first place.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König 2016-08-12 12:59:59 +02:00 committed by Alex Deucher
parent dc157c6daa
commit 96105e5375
5 changed files with 70 additions and 146 deletions

View File

@ -833,6 +833,9 @@ struct amdgpu_ring {
/* maximum number of VMIDs */
#define AMDGPU_NUM_VM 16
/* Maximum number of PTEs the hardware can write with one command */
#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
/* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)

View File

@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
((last_pt + incr * count) != pt)) {
((last_pt + incr * count) != pt) ||
(count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
if (count) {
amdgpu_vm_update_pages(&params, last_pde,
@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
next_pe_start = amdgpu_bo_gpu_offset(pt);
next_pe_start += (addr & mask) * 8;
if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) {
if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
/* The next ptb is consecutive to current ptb.
* Don't call amdgpu_vm_update_pages now.
* Will update two ptbs together in future.

View File

@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
while (count) {
unsigned bytes = count * 8;
if (bytes > 0x1FFFF8)
bytes = 0x1FFFF8;
unsigned bytes = count * 8;
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
pe += bytes;
src += bytes;
count -= bytes / 8;
}
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*
* Update the page tables using sDMA (CIK).
*/
static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
uint64_t pe,
static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
uint64_t value;
unsigned ndw;
while (count) {
ndw = count;
if (ndw > 0x7FFFF)
ndw = 0x7FFFF;
if (flags & AMDGPU_PTE_VALID)
value = addr;
else
value = 0;
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = ndw; /* number of entries */
pe += ndw * 8;
addr += ndw * incr;
count -= ndw;
}
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**

View File

@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
while (count) {
unsigned bytes = count * 8;
if (bytes > 0x1FFFF8)
bytes = 0x1FFFF8;
unsigned bytes = count * 8;
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
pe += bytes;
src += bytes;
count -= bytes / 8;
}
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*
* Update the page tables using sDMA (CIK).
*/
static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
uint64_t pe,
static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
uint64_t value;
unsigned ndw;
while (count) {
ndw = count;
if (ndw > 0x7FFFF)
ndw = 0x7FFFF;
if (flags & AMDGPU_PTE_VALID)
value = addr;
else
value = 0;
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = ndw; /* number of entries */
pe += ndw * 8;
addr += ndw * incr;
count -= ndw;
}
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**

View File

@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
while (count) {
unsigned bytes = count * 8;
if (bytes > 0x1FFFF8)
bytes = 0x1FFFF8;
unsigned bytes = count * 8;
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
pe += bytes;
src += bytes;
count -= bytes / 8;
}
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*
* Update the page tables using sDMA (CIK).
*/
static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
uint64_t pe,
static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
uint64_t value;
unsigned ndw;
while (count) {
ndw = count;
if (ndw > 0x7FFFF)
ndw = 0x7FFFF;
if (flags & AMDGPU_PTE_VALID)
value = addr;
else
value = 0;
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = ndw; /* number of entries */
pe += ndw * 8;
addr += ndw * incr;
count -= ndw;
}
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**