mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-28 13:22:57 +00:00
nouveau: fix mapping 2MB sysmem pages
The nvif_object_ioctl() method NVIF_VMM_V0_PFNMAP wasn't correctly setting the hardware specific GPU page table entries for 2MB sized pages. Fix this by adding functions to set and clear PD0 GPU page table entries. Link: https://lore.kernel.org/r/20200701225352.9649-4-rcampbell@nvidia.com Signed-off-by: Ralph Campbell <rcampbell@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
0cafc62e4d
commit
4725c6b82a
2 changed files with 84 additions and 3 deletions
|
@ -1204,7 +1204,6 @@ nvkm_vmm_pfn_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size)
|
|||
/*TODO:
|
||||
* - Avoid PT readback (for dma_unmap etc), this might end up being dealt
|
||||
* with inside HMM, which would be a lot nicer for us to deal with.
|
||||
* - Multiple page sizes (particularly for huge page support).
|
||||
* - Support for systems without a 4KiB page size.
|
||||
*/
|
||||
int
|
||||
|
@ -1220,8 +1219,8 @@ nvkm_vmm_pfn_map(struct nvkm_vmm *vmm, u8 shift, u64 addr, u64 size, u64 *pfn)
|
|||
/* Only support mapping where the page size of the incoming page
|
||||
* array matches a page size available for direct mapping.
|
||||
*/
|
||||
while (page->shift && page->shift != shift &&
|
||||
page->desc->func->pfn == NULL)
|
||||
while (page->shift && (page->shift != shift ||
|
||||
page->desc->func->pfn == NULL))
|
||||
page++;
|
||||
|
||||
if (!page->shift || !IS_ALIGNED(addr, 1ULL << shift) ||
|
||||
|
|
|
@ -258,12 +258,94 @@ gp100_vmm_pd0_unmap(struct nvkm_vmm *vmm,
|
|||
VMM_FO128(pt, vmm, pdei * 0x10, 0ULL, 0ULL, pdes);
|
||||
}
|
||||
|
||||
static void
|
||||
gp100_vmm_pd0_pfn_unmap(struct nvkm_vmm *vmm,
|
||||
struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes)
|
||||
{
|
||||
struct device *dev = vmm->mmu->subdev.device->dev;
|
||||
dma_addr_t addr;
|
||||
|
||||
nvkm_kmap(pt->memory);
|
||||
while (ptes--) {
|
||||
u32 datalo = nvkm_ro32(pt->memory, pt->base + ptei * 16 + 0);
|
||||
u32 datahi = nvkm_ro32(pt->memory, pt->base + ptei * 16 + 4);
|
||||
u64 data = (u64)datahi << 32 | datalo;
|
||||
|
||||
if ((data & (3ULL << 1)) != 0) {
|
||||
addr = (data >> 8) << 12;
|
||||
dma_unmap_page(dev, addr, 1UL << 21, DMA_BIDIRECTIONAL);
|
||||
}
|
||||
ptei++;
|
||||
}
|
||||
nvkm_done(pt->memory);
|
||||
}
|
||||
|
||||
static bool
|
||||
gp100_vmm_pd0_pfn_clear(struct nvkm_vmm *vmm,
|
||||
struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes)
|
||||
{
|
||||
bool dma = false;
|
||||
|
||||
nvkm_kmap(pt->memory);
|
||||
while (ptes--) {
|
||||
u32 datalo = nvkm_ro32(pt->memory, pt->base + ptei * 16 + 0);
|
||||
u32 datahi = nvkm_ro32(pt->memory, pt->base + ptei * 16 + 4);
|
||||
u64 data = (u64)datahi << 32 | datalo;
|
||||
|
||||
if ((data & BIT_ULL(0)) && (data & (3ULL << 1)) != 0) {
|
||||
VMM_WO064(pt, vmm, ptei * 16, data & ~BIT_ULL(0));
|
||||
dma = true;
|
||||
}
|
||||
ptei++;
|
||||
}
|
||||
nvkm_done(pt->memory);
|
||||
return dma;
|
||||
}
|
||||
|
||||
static void
|
||||
gp100_vmm_pd0_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
|
||||
u32 ptei, u32 ptes, struct nvkm_vmm_map *map)
|
||||
{
|
||||
struct device *dev = vmm->mmu->subdev.device->dev;
|
||||
dma_addr_t addr;
|
||||
|
||||
nvkm_kmap(pt->memory);
|
||||
while (ptes--) {
|
||||
u64 data = 0;
|
||||
|
||||
if (!(*map->pfn & NVKM_VMM_PFN_W))
|
||||
data |= BIT_ULL(6); /* RO. */
|
||||
|
||||
if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) {
|
||||
addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT;
|
||||
addr = dma_map_page(dev, pfn_to_page(addr), 0,
|
||||
1UL << 21, DMA_BIDIRECTIONAL);
|
||||
if (!WARN_ON(dma_mapping_error(dev, addr))) {
|
||||
data |= addr >> 4;
|
||||
data |= 2ULL << 1; /* SYSTEM_COHERENT_MEMORY. */
|
||||
data |= BIT_ULL(3); /* VOL. */
|
||||
data |= BIT_ULL(0); /* VALID. */
|
||||
}
|
||||
} else {
|
||||
data |= (*map->pfn & NVKM_VMM_PFN_ADDR) >> 4;
|
||||
data |= BIT_ULL(0); /* VALID. */
|
||||
}
|
||||
|
||||
VMM_WO064(pt, vmm, ptei++ * 16, data);
|
||||
map->pfn++;
|
||||
}
|
||||
nvkm_done(pt->memory);
|
||||
}
|
||||
|
||||
static const struct nvkm_vmm_desc_func
|
||||
gp100_vmm_desc_pd0 = {
|
||||
.unmap = gp100_vmm_pd0_unmap,
|
||||
.sparse = gp100_vmm_pd0_sparse,
|
||||
.pde = gp100_vmm_pd0_pde,
|
||||
.mem = gp100_vmm_pd0_mem,
|
||||
.pfn = gp100_vmm_pd0_pfn,
|
||||
.pfn_clear = gp100_vmm_pd0_pfn_clear,
|
||||
.pfn_unmap = gp100_vmm_pd0_pfn_unmap,
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in a new issue