Merge branch 'drm-next-4.16' of git://people.freedesktop.org/~agd5f/linux into drm-next

A few fixes for 4.16: - Cleanup the the remains of ttm io_mem_pfn - A couple dpm quirks for SI - Add Chunming as another amdgpu maintainer - A few more huge page fixes - A few other misc fixes * 'drm-next-4.16' of git://people.freedesktop.org/~agd5f/linux: drm/amd/pp: Implement get_max_high_clocks for CI/VI MAINTAINERS: add David (Chunming) Zhou as additional amdgpu maintainer drm/amdgpu: fix 64bit BAR detection drm/amdgpu: optimize moved handling only when vm_debug is inactive drm/amdgpu: simplify huge page handling drm/amdgpu: update VM PDs after the PTs drm/amdgpu: minor optimize VM moved handling v2 drm/amdgpu: loosen the criteria for huge pages a bit drm/amd/powerplay: set pp_num_states as 0 on error situation drm/ttm: specify DMA_ATTR_NO_WARN for huge page pools drm/ttm: remove ttm_bo_default_io_mem_pfn staging: remove the default io_mem_pfn set drm/amd/powerplay: fix memory leakage when reload (v2) drm/amdgpu/gfx9: only init the apertures used by KGD (v2) drm/amdgpu: add atpx quirk handling (v2) drm/amdgpu: Add dpm quirk for Jet PRO (v2) drm/radeon: Add dpm quirk for Jet PRO (v2)
2024-09-19 09:04:57 +00:00 · 2018-01-12 12:16:58 +10:00 · 2018-01-12 12:16:58 +10:00 · 8563188e37
commit 8563188e37
parent 9be712ef46 ad8cec7df5
16 changed files with 139 additions and 86 deletions
--- a/1
+++ b/1
@ -11382,6 +11382,7 @@ F:	drivers/net/wireless/quantenna
 RADEON and AMDGPU DRM DRIVERS
 M:	Alex Deucher <alexander.deucher@amd.com>
 M:	Christian König <christian.koenig@amd.com>
+M:	David (ChunMing) Zhou <David1.Zhou@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 T:	git git://people.freedesktop.org/~agd5f/linux
 S:	Supported
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@ -14,6 +14,16 @@

 #include "amd_acpi.h"

+#define AMDGPU_PX_QUIRK_FORCE_ATPX  (1 << 0)
+
+struct amdgpu_px_quirk {
+	u32 chip_vendor;
+	u32 chip_device;
+	u32 subsys_vendor;
+	u32 subsys_device;
+	u32 px_quirk_flags;
+};
+
 struct amdgpu_atpx_functions {
 	bool px_params;
 	bool power_cntl;
@ -35,6 +45,7 @@ struct amdgpu_atpx {
 static struct amdgpu_atpx_priv {
 	bool atpx_detected;
 	bool bridge_pm_usable;
+	unsigned int quirks;
 	/* handle for device - and atpx */
 	acpi_handle dhandle;
 	acpi_handle other_handle;
@ -205,13 +216,19 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)

 	atpx->is_hybrid = false;
 	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
-		printk("ATPX Hybrid Graphics\n");
-		/*
-		 * Disable legacy PM methods only when pcie port PM is usable,
-		 * otherwise the device might fail to power off or power on.
-		 */
-		atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
-		atpx->is_hybrid = true;
+		if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
+			printk("ATPX Hybrid Graphics, forcing to ATPX\n");
+			atpx->functions.power_cntl = true;
+			atpx->is_hybrid = false;
+		} else {
+			printk("ATPX Hybrid Graphics\n");
+			/*
+			 * Disable legacy PM methods only when pcie port PM is usable,
+			 * otherwise the device might fail to power off or power on.
+			 */
+			atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
+			atpx->is_hybrid = true;
+		}
 	}

 	atpx->dgpu_req_power_for_displays = false;
@ -547,6 +564,30 @@ static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.get_client_id = amdgpu_atpx_get_client_id,
 };

+static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
+	/* HG _PR3 doesn't seem to work on this A+A weston board */
+	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0, 0, 0, 0, 0 },
+};
+
+static void amdgpu_atpx_get_quirks(struct pci_dev *pdev)
+{
+	const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list;
+
+	/* Apply PX quirks */
+	while (p && p->chip_device != 0) {
+		if (pdev->vendor == p->chip_vendor &&
+		    pdev->device == p->chip_device &&
+		    pdev->subsystem_vendor == p->subsys_vendor &&
+		    pdev->subsystem_device == p->subsys_device) {
+			amdgpu_atpx_priv.quirks |= p->px_quirk_flags;
+			break;
+		}
+		++p;
+	}
+}
+
 /**
 * amdgpu_atpx_detect - detect whether we have PX
 *
@ -570,6 +611,7 @@ static bool amdgpu_atpx_detect(void)

 		parent_pdev = pci_upstream_bridge(pdev);
 		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
 	}

 	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
@ -579,6 +621,7 @@ static bool amdgpu_atpx_detect(void)

 		parent_pdev = pci_upstream_bridge(pdev);
 		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
 	}

 	if (has_atpx && vga_count == 2) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -778,10 +778,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	struct amdgpu_bo *bo;
 	int i, r;

-	r = amdgpu_vm_update_directories(adev, vm);
-	if (r)
-		return r;
-
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		return r;
@ -839,6 +835,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;

+	r = amdgpu_vm_update_directories(adev, vm);
+	if (r)
+		return r;
+
 	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
 	if (r)
 		return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -626,7 +626,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 		root = root->parent;

 	pci_bus_for_each_resource(root, res, i) {
-		if (res && res->flags & IORESOURCE_MEM_64 &&
+		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
 		    res->start > 0x100000000ull)
 			break;
 	}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@ -518,10 +518,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (!amdgpu_vm_ready(vm))
 		return;

-	r = amdgpu_vm_update_directories(adev, vm);
-	if (r)
-		goto error;
-
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		goto error;
@ -530,6 +526,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	    operation == AMDGPU_VA_OP_REPLACE)
 		r = amdgpu_vm_bo_update(adev, bo_va, false);

+	r = amdgpu_vm_update_directories(adev, vm);
+	if (r)
+		goto error;
+
 error:
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@ -946,57 +946,38 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 					unsigned nptes, uint64_t dst,
 					uint64_t flags)
 {
-	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
 	uint64_t pd_addr, pde;

 	/* In the case of a mixed PT the PDE must point to it*/
-	if (p->adev->asic_type < CHIP_VEGA10 ||
-	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
-	    p->src ||
-	    !(flags & AMDGPU_PTE_VALID)) {
-
-		dst = amdgpu_bo_gpu_offset(entry->base.bo);
-		flags = AMDGPU_PTE_VALID;
-	} else {
+	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
+	    nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
 		/* Set the huge page flag to stop scanning at this PDE */
 		flags |= AMDGPU_PDE_PTE;
 	}

-	if (!entry->huge && !(flags & AMDGPU_PDE_PTE))
+	if (!(flags & AMDGPU_PDE_PTE)) {
+		if (entry->huge) {
+			/* Add the entry to the relocated list to update it. */
+			entry->huge = false;
+			spin_lock(&p->vm->status_lock);
+			list_move(&entry->base.vm_status, &p->vm->relocated);
+			spin_unlock(&p->vm->status_lock);
+		}
 		return;
-	entry->huge = !!(flags & AMDGPU_PDE_PTE);
+	}

+	entry->huge = true;
 	amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
 			       &dst, &flags);

-	if (use_cpu_update) {
-		/* In case a huge page is replaced with a system
-		 * memory mapping, p->pages_addr != NULL and
-		 * amdgpu_vm_cpu_set_ptes would try to translate dst
-		 * through amdgpu_vm_map_gart. But dst is already a
-		 * GPU address (of the page table). Disable
-		 * amdgpu_vm_map_gart temporarily.
-		 */
-		dma_addr_t *tmp;
-
-		tmp = p->pages_addr;
-		p->pages_addr = NULL;
-
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
+	if (parent->base.bo->shadow) {
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
 		pde = pd_addr + (entry - parent->entries) * 8;
-		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
-
-		p->pages_addr = tmp;
-	} else {
-		if (parent->base.bo->shadow) {
-			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
-			pde = pd_addr + (entry - parent->entries) * 8;
-			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
-		}
-		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-		pde = pd_addr + (entry - parent->entries) * 8;
-		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+		p->func(p, pde, dst, 1, 0, flags);
 	}
+	pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
+	pde = pd_addr + (entry - parent->entries) * 8;
+	p->func(p, pde, dst, 1, 0, flags);
 }

 /**
@ -1208,12 +1189,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;

-	/* one PDE write for each huge page */
-	if (vm->root.base.bo->shadow)
-		ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6 * 2;
-	else
-		ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
-
 	if (pages_addr) {
 		/* copy commands needed */
 		ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
@ -1288,8 +1263,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,

 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(adev, vm, &vm->root,
-				   adev->vm_manager.root_level);
 	return r;
 }

@ -1700,18 +1673,31 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->moved)) {
 		struct amdgpu_bo_va *bo_va;
+		struct reservation_object *resv;

 		bo_va = list_first_entry(&vm->moved,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);

+		resv = bo_va->base.bo->tbo.resv;
+
 		/* Per VM BOs never need to bo cleared in the page tables */
-		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+		if (resv == vm->root.base.bo->tbo.resv)
+			clear = false;
+		/* Try to reserve the BO to avoid clearing its ptes */
+		else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+			clear = false;
+		/* Somebody else is using the BO right now */
+		else
+			clear = true;

 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;

+		if (!clear && resv != vm->root.base.bo->tbo.resv)
+			reservation_object_unlock(resv);
+
 		spin_lock(&vm->status_lock);
 	}
 	spin_unlock(&vm->status_lock);
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@ -1526,7 +1526,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
 		soc15_grbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
 		if (i == 0) {
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@ -3464,6 +3464,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 		    (adev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+		if ((adev->pdev->revision == 0xC3) ||
+		    (adev->pdev->device == 0x6665)) {
+			max_sclk = 60000;
+			max_mclk = 80000;
+		}
 	} else if (adev->asic_type == CHIP_OLAND) {
 		if ((adev->pdev->revision == 0xC7) ||
 		    (adev->pdev->revision == 0x80) ||
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@ -718,6 +718,8 @@ static int pp_dpm_get_pp_num_states(void *handle,
 	struct pp_instance *pp_handle = (struct pp_instance *)handle;
 	int ret = 0;

+	memset(data, 0, sizeof(*data));
+
 	ret = pp_check(pp_handle);

 	if (ret)
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@ -4651,6 +4651,25 @@ static int smu7_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 	return 0;
 }

+static int smu7_get_max_high_clocks(struct pp_hwmgr *hwmgr,
+					struct amd_pp_simple_clock_info *clocks)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct smu7_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct smu7_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+
+	if (clocks == NULL)
+		return -EINVAL;
+
+	clocks->memory_max_clock = mclk_table->count > 1 ?
+				mclk_table->dpm_levels[mclk_table->count-1].value :
+				mclk_table->dpm_levels[0].value;
+	clocks->engine_max_clock = sclk_table->count > 1 ?
+				sclk_table->dpm_levels[sclk_table->count-1].value :
+				sclk_table->dpm_levels[0].value;
+	return 0;
+}
+
 static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.backend_init = &smu7_hwmgr_backend_init,
 	.backend_fini = &smu7_hwmgr_backend_fini,
@ -4703,6 +4722,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.disable_smc_firmware_ctf = smu7_thermal_disable_alert,
 	.start_thermal_controller = smu7_start_thermal_controller,
 	.notify_cac_buffer_info = smu7_notify_cac_buffer_info,
+	.get_max_high_clocks = smu7_get_max_high_clocks,
 };

 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
@ -648,6 +648,12 @@ int smu7_init(struct pp_hwmgr *hwmgr)

 int smu7_smu_fini(struct pp_hwmgr *hwmgr)
 {
+	struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend);
+
+	smu_free_memory(hwmgr->device, (void *) smu_data->header_buffer.handle);
+	if (!cgs_is_virtualization_enabled(hwmgr->device))
+		smu_free_memory(hwmgr->device, (void *) smu_data->smu_buffer.handle);
+
 	kfree(hwmgr->smu_backend);
 	hwmgr->smu_backend = NULL;
 	cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@ -2984,6 +2984,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+		if ((rdev->pdev->revision == 0xC3) ||
+		    (rdev->pdev->device == 0x6665)) {
+			max_sclk = 60000;
+			max_mclk = 80000;
+		}
 	} else if (rdev->family == CHIP_OLAND) {
 		if ((rdev->pdev->revision == 0xC7) ||
 		    (rdev->pdev->revision == 0x80) ||
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@ -100,7 +100,8 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
 	if (bdev->driver->io_mem_pfn)
 		return bdev->driver->io_mem_pfn(bo, page_offset);

-	return ttm_bo_default_io_mem_pfn(bo, page_offset);
+	return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT)
+		+ page_offset;
 }

 static int ttm_bo_vm_fault(struct vm_fault *vmf)
@ -420,14 +421,6 @@ static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
 	return bo;
 }

-unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
-					unsigned long page_offset)
-{
-	return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT)
-		+ page_offset;
-}
-EXPORT_SYMBOL(ttm_bo_default_io_mem_pfn);
-
 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 		struct ttm_bo_device *bdev)
 {
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@ -333,14 +333,18 @@ static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
 static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
 {
 	struct dma_page *d_page;
+	unsigned long attrs = 0;
 	void *vaddr;

 	d_page = kmalloc(sizeof(struct dma_page), GFP_KERNEL);
 	if (!d_page)
 		return NULL;

-	vaddr = dma_alloc_coherent(pool->dev, pool->size, &d_page->dma,
-				   pool->gfp_flags);
+	if (pool->type & IS_HUGE)
+		attrs = DMA_ATTR_NO_WARN;
+
+	vaddr = dma_alloc_attrs(pool->dev, pool->size, &d_page->dma,
+				pool->gfp_flags, attrs);
 	if (vaddr) {
 		if (is_vmalloc_addr(vaddr))
 			d_page->p = vmalloc_to_page(vaddr);
--- a/drivers/staging/vboxvideo/vbox_ttm.c
+++ b/drivers/staging/vboxvideo/vbox_ttm.c
@ -234,7 +234,6 @@ static struct ttm_bo_driver vbox_bo_driver = {
 	.verify_access = vbox_bo_verify_access,
 	.io_mem_reserve = &vbox_ttm_io_mem_reserve,
 	.io_mem_free = &vbox_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };

 int vbox_mm_init(struct vbox_private *vbox)
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@ -704,17 +704,6 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map);
 */
 int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo);

-/**
- * ttm_bo_default_iomem_pfn - get a pfn for a page offset
- *
- * @bo: the BO we need to look up the pfn for
- * @page_offset: offset inside the BO to look up.
- *
- * Calculate the PFN for iomem based mappings during page fault
- */
-unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
-				        unsigned long page_offset);
-
 /**
 * ttm_bo_mmap - mmap out of the ttm device address space.
 *