diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 387b4979f45a..1a7e05da470e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -833,6 +833,9 @@ struct amdgpu_ring { /* maximum number of VMIDs */ #define AMDGPU_NUM_VM 16 +/* Maximum number of PTEs the hardware can write with one command */ +#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF + /* number of entries in page table */ #define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 12925016370b..673c258e49db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { + ((last_pt + incr * count) != pt) || + (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { amdgpu_vm_update_pages(¶ms, last_pde, @@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, next_pe_start = amdgpu_bo_gpu_offset(pt); next_pe_start += (addr & mask) * 8; - if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) { + if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && + ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { /* The next ptb is consecutive to current ptb. * Don't call amdgpu_vm_update_pages now. * Will update two ptbs together in future. diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index e5e44f42e20e..e71cd12104b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t src, unsigned count) { - while (count) { - unsigned bytes = count * 8; - if (bytes > 0x1FFFF8) - bytes = 0x1FFFF8; + unsigned bytes = count * 8; - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, - SDMA_WRITE_SUB_OPCODE_LINEAR, 0); - ib->ptr[ib->length_dw++] = bytes; - ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ - ib->ptr[ib->length_dw++] = lower_32_bits(src); - ib->ptr[ib->length_dw++] = upper_32_bits(src); - ib->ptr[ib->length_dw++] = lower_32_bits(pe); - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - - pe += bytes; - src += bytes; - count -= bytes / 8; - } + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, + SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = bytes; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); } /** @@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, * * Update the page tables using sDMA (CIK). */ -static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, - uint64_t pe, +static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - uint64_t value; - unsigned ndw; - - while (count) { - ndw = count; - if (ndw > 0x7FFFF) - ndw = 0x7FFFF; - - if (flags & AMDGPU_PTE_VALID) - value = addr; - else - value = 0; - - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = ndw; /* number of entries */ - - pe += ndw * 8; - addr += ndw * incr; - count -= ndw; - } + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count; /* number of entries */ } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index af0f0d283472..e82229686783 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t src, unsigned count) { - while (count) { - unsigned bytes = count * 8; - if (bytes > 0x1FFFF8) - bytes = 0x1FFFF8; + unsigned bytes = count * 8; - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); - ib->ptr[ib->length_dw++] = bytes; - ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ - ib->ptr[ib->length_dw++] = lower_32_bits(src); - ib->ptr[ib->length_dw++] = upper_32_bits(src); - ib->ptr[ib->length_dw++] = lower_32_bits(pe); - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - - pe += bytes; - src += bytes; - count -= bytes / 8; - } + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); } /** @@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, * * Update the page tables using sDMA (CIK). */ -static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, - uint64_t pe, +static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - uint64_t value; - unsigned ndw; - - while (count) { - ndw = count; - if (ndw > 0x7FFFF) - ndw = 0x7FFFF; - - if (flags & AMDGPU_PTE_VALID) - value = addr; - else - value = 0; - - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = ndw; /* number of entries */ - - pe += ndw * 8; - addr += ndw * incr; - count -= ndw; - } + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count; /* number of entries */ } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 88faaee37258..bee4978bec73 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t src, unsigned count) { - while (count) { - unsigned bytes = count * 8; - if (bytes > 0x1FFFF8) - bytes = 0x1FFFF8; + unsigned bytes = count * 8; - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); - ib->ptr[ib->length_dw++] = bytes; - ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ - ib->ptr[ib->length_dw++] = lower_32_bits(src); - ib->ptr[ib->length_dw++] = upper_32_bits(src); - ib->ptr[ib->length_dw++] = lower_32_bits(pe); - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - - pe += bytes; - src += bytes; - count -= bytes / 8; - } + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); } /** @@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, * * Update the page tables using sDMA (CIK). */ -static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, - uint64_t pe, +static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - uint64_t value; - unsigned ndw; - - while (count) { - ndw = count; - if (ndw > 0x7FFFF) - ndw = 0x7FFFF; - - if (flags & AMDGPU_PTE_VALID) - value = addr; - else - value = 0; - - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = ndw; /* number of entries */ - - pe += ndw * 8; - addr += ndw * incr; - count -= ndw; - } + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count; /* number of entries */ } /**