mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-27 12:57:53 +00:00
Merge tag 'amd-drm-next-5.20-2022-07-05' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.20-2022-07-05: amdgpu: - Various spelling and grammer fixes - Various eDP fixes - Various DMCUB fixes - VCN fixes - GMC 11 fixes - RAS fixes - TMZ support for GC 10.3.7 - GPUVM TLB flush fixes - SMU 13.0.x updates - DCN 3.2 Support - DCN 3.2.1 Support - MES updates - GFX11 modifiers support - USB-C fixes - MMHUB 3.0.1 support - SDMA 6.0 doorbell fixes - Initial devcoredump support - Enable high priority gfx queue on asics which support it - Enable GPU reset for SMU 13.0.4 - OLED display fixes - MPO fixes - DC frame size fixes - ASPM support for PCIE 7.4/7.6 - GPU reset support for SMU 13.0.0 - GFX11 updates - VCN JPEG fix - BACO support for SMU 13.0.7 - VCN instance handling fix - GFX8 GPUVM TLB flush fix - GPU reset rework - VCN 4.0.2 support - GTT size fixes - DP link training fixes - LSDMA 6.0.1 support - Various backlight fixes - Color encoding fixes - Backlight config cleanup - VCN 4.x unified queue cleanup amdkfd: - MMU notifier fixes - Updates for GC 10.3.6 and 10.3.7 - P2P DMA support using dma-buf - Add available memory IOCTL - SDMA 6.0.1 fix - MES fixes - HMM profiler support radeon: - License fix - Backlight config cleanup UAPI: - Add available memory IOCTL to amdkfd Proposed userspace: https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html - HMM profiler support for amdkfd Proposed userspace: https://lists.freedesktop.org/archives/amd-gfx/2022-June/080805.html Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220705212633.6037-1-alexander.deucher@amd.com
This commit is contained in:
commit
344feb7ccf
334 changed files with 354593 additions and 2370 deletions
|
@ -88,7 +88,8 @@ amdgpu-y += \
|
|||
gmc_v8_0.o \
|
||||
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
|
||||
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
|
||||
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o
|
||||
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
|
||||
mmhub_v3_0_1.o
|
||||
|
||||
# add UMC block
|
||||
amdgpu-y += \
|
||||
|
|
|
@ -223,6 +223,9 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
|
|||
static const bool __maybe_unused debug_evictions; /* = false */
|
||||
static const bool __maybe_unused no_system_mem_limit;
|
||||
#endif
|
||||
#ifdef CONFIG_HSA_AMD_P2P
|
||||
extern bool pcie_p2p;
|
||||
#endif
|
||||
|
||||
extern int amdgpu_tmz;
|
||||
extern int amdgpu_reset_method;
|
||||
|
@ -274,7 +277,7 @@ extern int amdgpu_vcnfw_log;
|
|||
#define CIK_CURSOR_WIDTH 128
|
||||
#define CIK_CURSOR_HEIGHT 128
|
||||
|
||||
/* smasrt shift bias level limits */
|
||||
/* smart shift bias level limits */
|
||||
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
|
||||
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
|
||||
|
||||
|
@ -667,6 +670,7 @@ enum amd_hw_ip_block_type {
|
|||
RSMU_HWIP,
|
||||
XGMI_HWIP,
|
||||
DCI_HWIP,
|
||||
PCIE_HWIP,
|
||||
MAX_HWIP
|
||||
};
|
||||
|
||||
|
@ -1044,10 +1048,18 @@ struct amdgpu_device {
|
|||
|
||||
/* reset dump register */
|
||||
uint32_t *reset_dump_reg_list;
|
||||
uint32_t *reset_dump_reg_value;
|
||||
int num_regs;
|
||||
#ifdef CONFIG_DEV_COREDUMP
|
||||
struct amdgpu_task_info reset_task_info;
|
||||
bool reset_vram_lost;
|
||||
struct timespec64 reset_time;
|
||||
#endif
|
||||
|
||||
bool scpm_enabled;
|
||||
uint32_t scpm_status;
|
||||
|
||||
struct work_struct reset_work;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
|
||||
|
@ -1242,7 +1254,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
|
|||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job* job);
|
||||
int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job);
|
||||
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
|
||||
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
|
||||
|
|
|
@ -66,9 +66,7 @@ struct amdgpu_atif {
|
|||
struct amdgpu_atif_notifications notifications;
|
||||
struct amdgpu_atif_functions functions;
|
||||
struct amdgpu_atif_notification_cfg notification_cfg;
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
struct backlight_device *bd;
|
||||
#endif
|
||||
struct amdgpu_dm_backlight_caps backlight_caps;
|
||||
};
|
||||
|
||||
|
@ -436,7 +434,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
|
|||
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
|
||||
|
||||
if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
if (atif->bd) {
|
||||
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
|
||||
req.backlight_level);
|
||||
|
@ -447,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
|
|||
*/
|
||||
backlight_device_set_brightness(atif->bd, req.backlight_level);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
|
||||
|
@ -849,7 +845,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
|
|||
{
|
||||
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
|
||||
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
if (atif->notifications.brightness_change) {
|
||||
if (amdgpu_device_has_dc_support(adev)) {
|
||||
#if defined(CONFIG_DRM_AMD_DC)
|
||||
|
@ -876,7 +871,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
|
||||
register_acpi_notifier(&adev->acpi_nb);
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_umc.h"
|
||||
#include "amdgpu_reset.h"
|
||||
|
||||
/* Total memory size in system memory and all GPU VRAM. Used to
|
||||
* estimate worst case amount of memory to reserve for page tables
|
||||
|
@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void amdgpu_amdkfd_reset_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
kfd.reset_work);
|
||||
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
|
||||
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
|
||||
adev_to_drm(adev), &gpu_resources);
|
||||
|
||||
INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
|
|||
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_device_should_recover_gpu(adev))
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
amdgpu_reset_domain_schedule(adev->reset_domain,
|
||||
&adev->kfd.reset_work);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
|
||||
|
@ -671,6 +684,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
|
|||
goto err_ib_sched;
|
||||
}
|
||||
|
||||
/* Drop the initial kref_init count (see drm_sched_main as example) */
|
||||
dma_fence_put(f);
|
||||
ret = dma_fence_wait(f, false);
|
||||
|
||||
err_ib_sched:
|
||||
|
@ -714,7 +729,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
|||
{
|
||||
bool all_hub = false;
|
||||
|
||||
if (adev->family == AMDGPU_FAMILY_AI)
|
||||
if (adev->family == AMDGPU_FAMILY_AI ||
|
||||
adev->family == AMDGPU_FAMILY_RV)
|
||||
all_hub = true;
|
||||
|
||||
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
|
||||
|
|
|
@ -48,6 +48,7 @@ enum kfd_mem_attachment_type {
|
|||
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
|
||||
KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
|
||||
KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
|
||||
KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
|
||||
};
|
||||
|
||||
struct kfd_mem_attachment {
|
||||
|
@ -96,6 +97,7 @@ struct amdgpu_kfd_dev {
|
|||
struct kfd_dev *dev;
|
||||
uint64_t vram_used;
|
||||
bool init_complete;
|
||||
struct work_struct reset_work;
|
||||
};
|
||||
|
||||
enum kgd_engine_type {
|
||||
|
@ -266,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
|||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
|
||||
void *drm_priv);
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct amdgpu_device *adev, uint64_t va, uint64_t size,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
|
@ -279,10 +282,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
|||
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
|
||||
int amdgpu_amdkfd_gpuvm_sync_memory(
|
||||
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
|
||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
|
||||
struct kgd_mem *mem, void **kptr, uint64_t *size);
|
||||
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
|
||||
struct kgd_mem *mem);
|
||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
|
||||
void **kptr, uint64_t *size);
|
||||
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
|
||||
|
||||
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
||||
struct dma_fence **ef);
|
||||
|
@ -332,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
|
|||
}
|
||||
#endif
|
||||
/* KGD2KFD callbacks */
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
|
||||
int kgd2kfd_resume_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
|
|
|
@ -32,12 +32,19 @@
|
|||
#include "amdgpu_dma_buf.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
/* Userptr restore delay, just long enough to allow consecutive VM
|
||||
* changes to accumulate
|
||||
*/
|
||||
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
|
||||
|
||||
/*
|
||||
* Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
|
||||
* BO chunk
|
||||
*/
|
||||
#define VRAM_ALLOCATION_ALIGN (1 << 21)
|
||||
|
||||
/* Impose limit on how much memory KFD can use */
|
||||
static struct {
|
||||
uint64_t max_system_mem_limit;
|
||||
|
@ -108,7 +115,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
|
|||
* compromise that should work in most cases without reserving too
|
||||
* much memory for page tables unnecessarily (factor 16K, >> 14).
|
||||
*/
|
||||
#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
|
||||
#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
|
||||
|
||||
static size_t amdgpu_amdkfd_acc_size(uint64_t size)
|
||||
{
|
||||
|
@ -148,7 +155,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
system_mem_needed = acc_size;
|
||||
ttm_mem_needed = acc_size;
|
||||
vram_needed = size;
|
||||
|
||||
/*
|
||||
* Conservatively round up the allocation requirement to 2 MB
|
||||
* to avoid fragmentation caused by 4K allocations in the tail
|
||||
* 2M BO chunk.
|
||||
*/
|
||||
vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
system_mem_needed = acc_size + size;
|
||||
ttm_mem_needed = acc_size;
|
||||
|
@ -173,7 +186,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev->kfd.vram_used + vram_needed >
|
||||
adev->gmc.real_vram_size - reserved_for_pt)) {
|
||||
adev->gmc.real_vram_size -
|
||||
atomic64_read(&adev->vram_pin_size) -
|
||||
reserved_for_pt)) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
|
@ -205,7 +220,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
|
|||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
kfd_mem_limit.system_mem_used -= acc_size;
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
adev->kfd.vram_used -= size;
|
||||
adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
|
@ -241,6 +256,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
|
|||
kfree(bo->kfd_bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
|
||||
* about USERPTR or DOOREBELL or MMIO BO.
|
||||
* @adev: Device for which dmamap BO is being created
|
||||
* @mem: BO of peer device that is being DMA mapped. Provides parameters
|
||||
* in building the dmamap BO
|
||||
* @bo_out: Output parameter updated with handle of dmamap BO
|
||||
*/
|
||||
static int
|
||||
create_dmamap_sg_bo(struct amdgpu_device *adev,
|
||||
struct kgd_mem *mem, struct amdgpu_bo **bo_out)
|
||||
{
|
||||
struct drm_gem_object *gem_obj;
|
||||
int ret, align;
|
||||
|
||||
ret = amdgpu_bo_reserve(mem->bo, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
align = 1;
|
||||
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
|
||||
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
|
||||
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
|
||||
|
||||
amdgpu_bo_unreserve(mem->bo);
|
||||
|
||||
if (ret) {
|
||||
pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*bo_out = gem_to_amdgpu_bo(gem_obj);
|
||||
(*bo_out)->parent = amdgpu_bo_ref(mem->bo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
|
||||
* reservation object.
|
||||
*
|
||||
|
@ -446,6 +497,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
|
|||
return pte_flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* create_sg_table() - Create an sg_table for a contiguous DMA addr range
|
||||
* @addr: The starting address to point to
|
||||
* @size: Size of memory area in bytes being pointed to
|
||||
*
|
||||
* Allocates an instance of sg_table and initializes it to point to memory
|
||||
* area specified by input parameters. The address used to build is assumed
|
||||
* to be DMA mapped, if needed.
|
||||
*
|
||||
* DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
|
||||
* because they are physically contiguous.
|
||||
*
|
||||
* Return: Initialized instance of SG Table or NULL
|
||||
*/
|
||||
static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
|
||||
{
|
||||
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
|
||||
|
||||
if (!sg)
|
||||
return NULL;
|
||||
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
|
||||
kfree(sg);
|
||||
return NULL;
|
||||
}
|
||||
sg_dma_address(sg->sgl) = addr;
|
||||
sg->sgl->length = size;
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->sgl->dma_length = size;
|
||||
#endif
|
||||
return sg;
|
||||
}
|
||||
|
||||
static int
|
||||
kfd_mem_dmamap_userptr(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
|
@ -510,6 +593,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
|
|||
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
|
||||
* @mem: SG BO of the DOORBELL or MMIO resource on the owning device
|
||||
* @attachment: Virtual address attachment of the BO on accessing device
|
||||
*
|
||||
* An access request from the device that owns DOORBELL does not require DMA mapping.
|
||||
* This is because the request doesn't go through PCIe root complex i.e. it instead
|
||||
* loops back. The need to DMA map arises only when accessing peer device's DOORBELL
|
||||
*
|
||||
* In contrast, all access requests for MMIO need to be DMA mapped without regard to
|
||||
* device ownership. This is because access requests for MMIO go through PCIe root
|
||||
* complex.
|
||||
*
|
||||
* This is accomplished in two steps:
|
||||
* - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
|
||||
* in updating requesting device's page table
|
||||
* - Signal TTM to mark memory pointed to by requesting device's BO as GPU
|
||||
* accessible. This allows an update of requesting device's page table
|
||||
* with entries associated with DOOREBELL or MMIO memory
|
||||
*
|
||||
* This method is invoked in the following contexts:
|
||||
* - Mapping of DOORBELL or MMIO BO of same or peer device
|
||||
* - Validating an evicted DOOREBELL or MMIO BO on device seeking access
|
||||
*
|
||||
* Return: ZERO if successful, NON-ZERO otherwise
|
||||
*/
|
||||
static int
|
||||
kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
struct ttm_operation_ctx ctx = {.interruptible = true};
|
||||
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
|
||||
struct amdgpu_device *adev = attachment->adev;
|
||||
struct ttm_tt *ttm = bo->tbo.ttm;
|
||||
enum dma_data_direction dir;
|
||||
dma_addr_t dma_addr;
|
||||
bool mmio;
|
||||
int ret;
|
||||
|
||||
/* Expect SG Table of dmapmap BO to be NULL */
|
||||
mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
|
||||
if (unlikely(ttm->sg)) {
|
||||
pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
dma_addr = mem->bo->tbo.sg->sgl->dma_address;
|
||||
pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
|
||||
pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
|
||||
dma_addr = dma_map_resource(adev->dev, dma_addr,
|
||||
mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
|
||||
ret = dma_mapping_error(adev->dev, dma_addr);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
|
||||
|
||||
ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
|
||||
if (unlikely(!ttm->sg)) {
|
||||
ret = -ENOMEM;
|
||||
goto unmap_sg;
|
||||
}
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (unlikely(ret))
|
||||
goto free_sg;
|
||||
|
||||
return ret;
|
||||
|
||||
free_sg:
|
||||
sg_free_table(ttm->sg);
|
||||
kfree(ttm->sg);
|
||||
ttm->sg = NULL;
|
||||
unmap_sg:
|
||||
dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
|
||||
dir, DMA_ATTR_SKIP_CPU_SYNC);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
kfd_mem_dmamap_attachment(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
|
@ -521,6 +685,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
|
|||
return kfd_mem_dmamap_userptr(mem, attachment);
|
||||
case KFD_MEM_ATT_DMABUF:
|
||||
return kfd_mem_dmamap_dmabuf(attachment);
|
||||
case KFD_MEM_ATT_SG:
|
||||
return kfd_mem_dmamap_sg_bo(mem, attachment);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
@ -561,6 +727,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
|
|||
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
|
||||
* @mem: SG BO of the DOORBELL or MMIO resource on the owning device
|
||||
* @attachment: Virtual address attachment of the BO on accessing device
|
||||
*
|
||||
* The method performs following steps:
|
||||
* - Signal TTM to mark memory pointed to by BO as GPU inaccessible
|
||||
* - Free SG Table that is used to encapsulate DMA mapped memory of
|
||||
* peer device's DOORBELL or MMIO memory
|
||||
*
|
||||
* This method is invoked in the following contexts:
|
||||
* UNMapping of DOORBELL or MMIO BO on a device having access to its memory
|
||||
* Eviction of DOOREBELL or MMIO BO on device having access to its memory
|
||||
*
|
||||
* Return: void
|
||||
*/
|
||||
static void
|
||||
kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
struct ttm_operation_ctx ctx = {.interruptible = true};
|
||||
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
|
||||
struct amdgpu_device *adev = attachment->adev;
|
||||
struct ttm_tt *ttm = bo->tbo.ttm;
|
||||
enum dma_data_direction dir;
|
||||
|
||||
if (unlikely(!ttm->sg)) {
|
||||
pr_err("SG Table of BO is UNEXPECTEDLY NULL");
|
||||
return;
|
||||
}
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
|
||||
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
|
||||
ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
|
||||
sg_free_table(ttm->sg);
|
||||
kfree(ttm->sg);
|
||||
ttm->sg = NULL;
|
||||
bo->tbo.sg = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
|
@ -574,38 +784,14 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
|
|||
case KFD_MEM_ATT_DMABUF:
|
||||
kfd_mem_dmaunmap_dmabuf(attachment);
|
||||
break;
|
||||
case KFD_MEM_ATT_SG:
|
||||
kfd_mem_dmaunmap_sg_bo(mem, attachment);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
|
||||
struct amdgpu_bo **bo)
|
||||
{
|
||||
unsigned long bo_size = mem->bo->tbo.base.size;
|
||||
struct drm_gem_object *gobj;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_bo_reserve(mem->bo, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = amdgpu_gem_object_create(adev, bo_size, 1,
|
||||
AMDGPU_GEM_DOMAIN_CPU,
|
||||
AMDGPU_GEM_CREATE_PREEMPTIBLE,
|
||||
ttm_bo_type_sg, mem->bo->tbo.base.resv,
|
||||
&gobj);
|
||||
amdgpu_bo_unreserve(mem->bo);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*bo = gem_to_amdgpu_bo(gobj);
|
||||
(*bo)->parent = amdgpu_bo_ref(mem->bo);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
|
||||
struct amdgpu_bo **bo)
|
||||
|
@ -656,6 +842,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
|||
uint64_t va = mem->va;
|
||||
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
|
||||
struct amdgpu_bo *bo[2] = {NULL, NULL};
|
||||
bool same_hive = false;
|
||||
int i, ret;
|
||||
|
||||
if (!va) {
|
||||
|
@ -663,6 +850,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
|
||||
*
|
||||
* The access path of MMIO and DOORBELL BOs of is always over PCIe.
|
||||
* In contrast the access path of VRAM BOs depens upon the type of
|
||||
* link that connects the peer device. Access over PCIe is allowed
|
||||
* if peer device has large BAR. In contrast, access over xGMI is
|
||||
* allowed for both small and large BAR configurations of peer device
|
||||
*/
|
||||
if ((adev != bo_adev) &&
|
||||
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
|
||||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
|
||||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
|
||||
if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
|
||||
same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
|
||||
if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i <= is_aql; i++) {
|
||||
attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
|
||||
if (unlikely(!attachment[i])) {
|
||||
|
@ -673,9 +878,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
|||
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
|
||||
va + bo_size, vm);
|
||||
|
||||
if (adev == bo_adev ||
|
||||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
|
||||
(mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) {
|
||||
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
|
||||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
|
||||
same_hive) {
|
||||
/* Mappings on the local GPU, or VRAM mappings in the
|
||||
* local hive, or userptr mapping IOMMU direct map mode
|
||||
* share the original BO
|
||||
|
@ -691,26 +896,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
|||
} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
|
||||
/* Create an SG BO to DMA-map userptrs on other GPUs */
|
||||
attachment[i]->type = KFD_MEM_ATT_USERPTR;
|
||||
ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
|
||||
ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
|
||||
if (ret)
|
||||
goto unwind;
|
||||
} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
|
||||
mem->bo->tbo.type != ttm_bo_type_sg) {
|
||||
/* GTT BOs use DMA-mapping ability of dynamic-attach
|
||||
* DMA bufs. TODO: The same should work for VRAM on
|
||||
* large-BAR GPUs.
|
||||
*/
|
||||
/* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
|
||||
} else if (mem->bo->tbo.type == ttm_bo_type_sg) {
|
||||
WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
|
||||
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
|
||||
"Handing invalid SG BO in ATTACH request");
|
||||
attachment[i]->type = KFD_MEM_ATT_SG;
|
||||
ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
|
||||
if (ret)
|
||||
goto unwind;
|
||||
/* Enable acces to GTT and VRAM BOs of peer devices */
|
||||
} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
|
||||
mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
|
||||
attachment[i]->type = KFD_MEM_ATT_DMABUF;
|
||||
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
|
||||
if (ret)
|
||||
goto unwind;
|
||||
pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
|
||||
} else {
|
||||
/* FIXME: Need to DMA-map other BO types:
|
||||
* large-BAR VRAM, doorbells, MMIO remap
|
||||
*/
|
||||
attachment[i]->type = KFD_MEM_ATT_SHARED;
|
||||
bo[i] = mem->bo;
|
||||
drm_gem_object_get(&bo[i]->tbo.base);
|
||||
WARN_ONCE(true, "Handling invalid ATTACH request");
|
||||
ret = -EINVAL;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
/* Add BO to VM internal data structures */
|
||||
|
@ -1111,24 +1320,6 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
|
||||
{
|
||||
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
|
||||
|
||||
if (!sg)
|
||||
return NULL;
|
||||
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
|
||||
kfree(sg);
|
||||
return NULL;
|
||||
}
|
||||
sg->sgl->dma_address = addr;
|
||||
sg->sgl->length = size;
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->sgl->dma_length = size;
|
||||
#endif
|
||||
return sg;
|
||||
}
|
||||
|
||||
static int process_validate_vms(struct amdkfd_process_info *process_info)
|
||||
{
|
||||
struct amdgpu_vm *peer_vm;
|
||||
|
@ -1457,6 +1648,22 @@ int amdgpu_amdkfd_criu_resume(void *p)
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
size_t available;
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
available = adev->gmc.real_vram_size
|
||||
- adev->kfd.vram_used
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt;
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct amdgpu_device *adev, uint64_t va, uint64_t size,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
|
@ -1497,7 +1704,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
bo_type = ttm_bo_type_sg;
|
||||
if (size > UINT_MAX)
|
||||
return -EINVAL;
|
||||
sg = create_doorbell_sg(*offset, size);
|
||||
sg = create_sg_table(*offset, size);
|
||||
if (!sg)
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
|
@ -1907,8 +2114,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
|
|||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
|
||||
struct kgd_mem *mem, void **kptr, uint64_t *size)
|
||||
/**
|
||||
* amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
|
||||
* @adev: Device to which allocated BO belongs
|
||||
* @bo: Buffer object to be mapped
|
||||
*
|
||||
* Before return, bo reference count is incremented. To release the reference and unpin/
|
||||
* unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
|
||||
*/
|
||||
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (ret) {
|
||||
pr_err("Failed to reserve bo. ret %d\n", ret);
|
||||
goto err_reserve_bo_failed;
|
||||
}
|
||||
|
||||
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
if (ret) {
|
||||
pr_err("Failed to pin bo. ret %d\n", ret);
|
||||
goto err_pin_bo_failed;
|
||||
}
|
||||
|
||||
ret = amdgpu_ttm_alloc_gart(&bo->tbo);
|
||||
if (ret) {
|
||||
pr_err("Failed to bind bo to GART. ret %d\n", ret);
|
||||
goto err_map_bo_gart_failed;
|
||||
}
|
||||
|
||||
amdgpu_amdkfd_remove_eviction_fence(
|
||||
bo, bo->kfd_bo->process_info->eviction_fence);
|
||||
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
bo = amdgpu_bo_ref(bo);
|
||||
|
||||
return 0;
|
||||
|
||||
err_map_bo_gart_failed:
|
||||
amdgpu_bo_unpin(bo);
|
||||
err_pin_bo_failed:
|
||||
amdgpu_bo_unreserve(bo);
|
||||
err_reserve_bo_failed:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
|
||||
*
|
||||
* @mem: Buffer object to be mapped for CPU access
|
||||
* @kptr[out]: pointer in kernel CPU address space
|
||||
* @size[out]: size of the buffer
|
||||
*
|
||||
* Pins the BO and maps it for kernel CPU access. The eviction fence is removed
|
||||
* from the BO, since pinned BOs cannot be evicted. The bo must remain on the
|
||||
* validate_list, so the GPU mapping can be restored after a page table was
|
||||
* evicted.
|
||||
*
|
||||
* Return: 0 on success, error code on failure
|
||||
*/
|
||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
|
||||
void **kptr, uint64_t *size)
|
||||
{
|
||||
int ret;
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
|
@ -1959,8 +2227,15 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
|
|||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
|
||||
struct kgd_mem *mem)
|
||||
/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
|
||||
*
|
||||
* @mem: Buffer object to be unmapped for CPU access
|
||||
*
|
||||
* Removes the kernel CPU mapping and unpins the BO. It does not restore the
|
||||
* eviction fence, so this function should only be used for cleanup before the
|
||||
* BO is destroyed.
|
||||
*/
|
||||
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
|
||||
{
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
|
||||
|
@ -2072,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
|
|||
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
|
||||
if (evicted_bos == 1) {
|
||||
/* First eviction, stop the queues */
|
||||
r = kgd2kfd_quiesce_mm(mm);
|
||||
r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
|
||||
if (r)
|
||||
pr_err("Failed to quiesce KFD\n");
|
||||
schedule_delayed_work(&process_info->restore_userptr_work,
|
||||
|
@ -2346,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
|
|||
|
||||
unlock_out:
|
||||
mutex_unlock(&process_info->lock);
|
||||
mmput(mm);
|
||||
put_task_struct(usertask);
|
||||
|
||||
/* If validation failed, reschedule another attempt */
|
||||
if (evicted_bos)
|
||||
if (evicted_bos) {
|
||||
schedule_delayed_work(&process_info->restore_userptr_work,
|
||||
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
|
||||
|
||||
kfd_smi_event_queue_restore_rescheduled(mm);
|
||||
}
|
||||
mmput(mm);
|
||||
put_task_struct(usertask);
|
||||
}
|
||||
|
||||
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
|
||||
|
|
|
@ -110,7 +110,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
|
|||
return -EACCES;
|
||||
}
|
||||
|
||||
static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
|
||||
static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
|
||||
{
|
||||
switch (prio) {
|
||||
case AMDGPU_CTX_PRIORITY_HIGH:
|
||||
|
@ -143,8 +143,9 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
|
|||
ctx->init_priority : ctx->override_priority;
|
||||
|
||||
switch (hw_ip) {
|
||||
case AMDGPU_HW_IP_GFX:
|
||||
case AMDGPU_HW_IP_COMPUTE:
|
||||
hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
|
||||
hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCE:
|
||||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
|
@ -779,7 +780,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
|
|||
amdgpu_ctx_to_drm_sched_prio(priority));
|
||||
|
||||
/* set hw priority */
|
||||
if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
|
||||
if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
|
||||
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
|
||||
hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
|
||||
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
|
||||
|
|
|
@ -1709,17 +1709,24 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
|
|||
i++;
|
||||
} while (len < size);
|
||||
|
||||
new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto error_free;
|
||||
}
|
||||
ret = down_write_killable(&adev->reset_domain->sem);
|
||||
if (ret)
|
||||
goto error_free;
|
||||
|
||||
swap(adev->reset_dump_reg_list, tmp);
|
||||
swap(adev->reset_dump_reg_value, new);
|
||||
adev->num_regs = i;
|
||||
up_write(&adev->reset_domain->sem);
|
||||
ret = size;
|
||||
|
||||
error_free:
|
||||
kfree(tmp);
|
||||
kfree(new);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/devcoredump.h>
|
||||
#include <generated/utsrelease.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
|
||||
#include <drm/drm_atomic_helper.h>
|
||||
#include <drm/drm_probe_helper.h>
|
||||
|
@ -1942,35 +1945,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
switch (adev->asic_type) {
|
||||
#ifdef CONFIG_DRM_AMDGPU_SI
|
||||
case CHIP_VERDE:
|
||||
case CHIP_TAHITI:
|
||||
case CHIP_PITCAIRN:
|
||||
case CHIP_OLAND:
|
||||
case CHIP_HAINAN:
|
||||
#endif
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
case CHIP_BONAIRE:
|
||||
case CHIP_HAWAII:
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_KABINI:
|
||||
case CHIP_MULLINS:
|
||||
#endif
|
||||
case CHIP_TOPAZ:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_STONEY:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ALDEBARAN:
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
case CHIP_BEIGE_GOBY:
|
||||
default:
|
||||
return 0;
|
||||
case CHIP_VEGA10:
|
||||
|
@ -3316,38 +3290,12 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
|
|||
case CHIP_MULLINS:
|
||||
/*
|
||||
* We have systems in the wild with these ASICs that require
|
||||
* LVDS and VGA support which is not supported with DC.
|
||||
* VGA support which is not supported with DC.
|
||||
*
|
||||
* Fallback to the non-DC driver here by default so as not to
|
||||
* cause regressions.
|
||||
*/
|
||||
return amdgpu_dc > 0;
|
||||
case CHIP_HAWAII:
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_STONEY:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_RENOIR:
|
||||
case CHIP_CYAN_SKILLFISH:
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
case CHIP_BEIGE_GOBY:
|
||||
case CHIP_VANGOGH:
|
||||
case CHIP_YELLOW_CARP:
|
||||
#endif
|
||||
default:
|
||||
return amdgpu_dc != 0;
|
||||
#else
|
||||
|
@ -3369,7 +3317,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
|
|||
*/
|
||||
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_sriov_vf(adev) ||
|
||||
if (amdgpu_sriov_vf(adev) ||
|
||||
adev->enable_virtual_display ||
|
||||
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
|
||||
return false;
|
||||
|
@ -3667,14 +3615,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
if (amdgpu_mcbp)
|
||||
DRM_INFO("MCBP is enabled\n");
|
||||
|
||||
if (adev->asic_type >= CHIP_NAVI10) {
|
||||
if (amdgpu_mes || amdgpu_mes_kiq)
|
||||
adev->enable_mes = true;
|
||||
|
||||
if (amdgpu_mes_kiq)
|
||||
adev->enable_mes_kiq = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset domain needs to be present early, before XGMI hive discovered
|
||||
* (if any) and intitialized to use reset sem and in_gpu reset flag
|
||||
|
@ -4666,6 +4606,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
|||
amdgpu_virt_fini_data_exchange(adev);
|
||||
}
|
||||
|
||||
amdgpu_fence_driver_isr_toggle(adev, true);
|
||||
|
||||
/* block all schedulers and reset given job's ring */
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
@ -4681,6 +4623,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
|||
amdgpu_fence_driver_force_completion(ring);
|
||||
}
|
||||
|
||||
amdgpu_fence_driver_isr_toggle(adev, false);
|
||||
|
||||
if (job && job->vm)
|
||||
drm_sched_increase_karma(&job->base);
|
||||
|
||||
|
@ -4721,20 +4665,73 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
|||
|
||||
static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t reg_value;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&adev->reset_domain->sem);
|
||||
dump_stack();
|
||||
|
||||
for (i = 0; i < adev->num_regs; i++) {
|
||||
reg_value = RREG32(adev->reset_dump_reg_list[i]);
|
||||
trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
|
||||
adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
|
||||
trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
|
||||
adev->reset_dump_reg_value[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEV_COREDUMP
|
||||
static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
|
||||
size_t count, void *data, size_t datalen)
|
||||
{
|
||||
struct drm_printer p;
|
||||
struct amdgpu_device *adev = data;
|
||||
struct drm_print_iterator iter;
|
||||
int i;
|
||||
|
||||
iter.data = buffer;
|
||||
iter.offset = 0;
|
||||
iter.start = offset;
|
||||
iter.remain = count;
|
||||
|
||||
p = drm_coredump_printer(&iter);
|
||||
|
||||
drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
|
||||
drm_printf(&p, "kernel: " UTS_RELEASE "\n");
|
||||
drm_printf(&p, "module: " KBUILD_MODNAME "\n");
|
||||
drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
|
||||
if (adev->reset_task_info.pid)
|
||||
drm_printf(&p, "process_name: %s PID: %d\n",
|
||||
adev->reset_task_info.process_name,
|
||||
adev->reset_task_info.pid);
|
||||
|
||||
if (adev->reset_vram_lost)
|
||||
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
|
||||
if (adev->num_regs) {
|
||||
drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
|
||||
|
||||
for (i = 0; i < adev->num_regs; i++)
|
||||
drm_printf(&p, "0x%08x: 0x%08x\n",
|
||||
adev->reset_dump_reg_list[i],
|
||||
adev->reset_dump_reg_value[i]);
|
||||
}
|
||||
|
||||
return count - iter.remain;
|
||||
}
|
||||
|
||||
static void amdgpu_devcoredump_free(void *data)
|
||||
{
|
||||
}
|
||||
|
||||
static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
|
||||
{
|
||||
struct drm_device *dev = adev_to_drm(adev);
|
||||
|
||||
ktime_get_ts64(&adev->reset_time);
|
||||
dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
|
||||
amdgpu_devcoredump_read, amdgpu_devcoredump_free);
|
||||
}
|
||||
#endif
|
||||
|
||||
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
|
||||
struct amdgpu_reset_context *reset_context)
|
||||
{
|
||||
|
@ -4819,6 +4816,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
|
|||
goto out;
|
||||
|
||||
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
|
||||
#ifdef CONFIG_DEV_COREDUMP
|
||||
tmp_adev->reset_vram_lost = vram_lost;
|
||||
memset(&tmp_adev->reset_task_info, 0,
|
||||
sizeof(tmp_adev->reset_task_info));
|
||||
if (reset_context->job && reset_context->job->vm)
|
||||
tmp_adev->reset_task_info =
|
||||
reset_context->job->vm->task_info;
|
||||
amdgpu_reset_capture_coredumpm(tmp_adev);
|
||||
#endif
|
||||
if (vram_lost) {
|
||||
DRM_INFO("VRAM is lost due to GPU reset!\n");
|
||||
amdgpu_inc_vram_lost(tmp_adev);
|
||||
|
@ -5004,16 +5010,32 @@ static void amdgpu_device_recheck_guilty_jobs(
|
|||
|
||||
/* clear job's guilty and depend the folowing step to decide the real one */
|
||||
drm_sched_reset_karma(s_job);
|
||||
/* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
|
||||
* to make sure fence is balanced */
|
||||
dma_fence_get(s_job->s_fence->parent);
|
||||
drm_sched_resubmit_jobs_ext(&ring->sched, 1);
|
||||
|
||||
if (!s_job->s_fence->parent) {
|
||||
DRM_WARN("Failed to get a HW fence for job!");
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
|
||||
if (ret == 0) { /* timeout */
|
||||
DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
|
||||
ring->sched.name, s_job->id);
|
||||
|
||||
|
||||
amdgpu_fence_driver_isr_toggle(adev, true);
|
||||
|
||||
/* Clear this failed job from fence array */
|
||||
amdgpu_fence_driver_clear_job_fences(ring);
|
||||
|
||||
amdgpu_fence_driver_isr_toggle(adev, false);
|
||||
|
||||
/* Since the job won't signal and we go for
|
||||
* another resubmit drop this parent pointer
|
||||
*/
|
||||
dma_fence_put(s_job->s_fence->parent);
|
||||
s_job->s_fence->parent = NULL;
|
||||
|
||||
/* set guilty */
|
||||
drm_sched_increase_karma(s_job);
|
||||
retry:
|
||||
|
@ -5042,7 +5064,6 @@ static void amdgpu_device_recheck_guilty_jobs(
|
|||
|
||||
/* got the hw fence, signal finished fence */
|
||||
atomic_dec(ring->sched.score);
|
||||
dma_fence_put(s_job->s_fence->parent);
|
||||
dma_fence_get(&s_job->s_fence->finished);
|
||||
dma_fence_signal(&s_job->s_fence->finished);
|
||||
dma_fence_put(&s_job->s_fence->finished);
|
||||
|
@ -5055,8 +5076,29 @@ static void amdgpu_device_recheck_guilty_jobs(
|
|||
}
|
||||
}
|
||||
|
||||
static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
cancel_work(&adev->reset_work);
|
||||
#endif
|
||||
|
||||
if (adev->kfd.dev)
|
||||
cancel_work(&adev->kfd.reset_work);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
cancel_work(&adev->virt.flr_work);
|
||||
|
||||
if (con && adev->ras_enabled)
|
||||
cancel_work(&con->recovery_work);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @job: which job trigger hang
|
||||
|
@ -5066,7 +5108,7 @@ static void amdgpu_device_recheck_guilty_jobs(
|
|||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
|
||||
int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
struct list_head device_list, *device_list_handle = NULL;
|
||||
|
@ -5164,7 +5206,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
|||
*/
|
||||
amdgpu_unregister_gpu_instance(tmp_adev);
|
||||
|
||||
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
|
||||
drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
|
||||
|
||||
/* disable ras on ALL IPs */
|
||||
if (!need_emergency_restart &&
|
||||
|
@ -5194,8 +5236,8 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
|||
*
|
||||
* job->base holds a reference to parent fence
|
||||
*/
|
||||
if (job && job->base.s_fence->parent &&
|
||||
dma_fence_is_signaled(job->base.s_fence->parent)) {
|
||||
if (job && (job->hw_fence.ops != NULL) &&
|
||||
dma_fence_is_signaled(&job->hw_fence)) {
|
||||
job_signaled = true;
|
||||
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
|
||||
goto skip_hw_reset;
|
||||
|
@ -5210,6 +5252,12 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
|||
r, adev_to_drm(tmp_adev)->unique);
|
||||
tmp_adev->asic_reset_res = r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop all pending non scheduler resets. Scheduler resets
|
||||
* were already dropped during drm_sched_stop
|
||||
*/
|
||||
amdgpu_device_stop_pending_resets(tmp_adev);
|
||||
}
|
||||
|
||||
tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
|
||||
|
@ -5308,40 +5356,11 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
|||
|
||||
if (r)
|
||||
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
|
||||
|
||||
atomic_set(&adev->reset_domain->reset_res, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
struct amdgpu_recover_work_struct {
|
||||
struct work_struct base;
|
||||
struct amdgpu_device *adev;
|
||||
struct amdgpu_job *job;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
|
||||
|
||||
recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
|
||||
}
|
||||
/*
|
||||
* Serialize gpu recover into reset domain single threaded wq
|
||||
*/
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
|
||||
|
||||
INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
|
||||
|
||||
if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
|
||||
return -EAGAIN;
|
||||
|
||||
flush_work(&work.base);
|
||||
|
||||
return work.ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
|
||||
*
|
||||
|
@ -5490,6 +5509,36 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @peer_adev: amdgpu_device pointer for peer device trying to access @adev
|
||||
*
|
||||
* Return true if @peer_adev can access (DMA) @adev through the PCIe
|
||||
* BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
|
||||
* @peer_adev.
|
||||
*/
|
||||
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *peer_adev)
|
||||
{
|
||||
#ifdef CONFIG_HSA_AMD_P2P
|
||||
uint64_t address_mask = peer_adev->dev->dma_mask ?
|
||||
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
|
||||
resource_size_t aper_limit =
|
||||
adev->gmc.aper_base + adev->gmc.aper_size - 1;
|
||||
bool p2p_access = !(pci_p2pdma_distance_many(adev->pdev,
|
||||
&peer_adev->dev, 1, true) < 0);
|
||||
|
||||
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
|
||||
adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
|
||||
!(adev->gmc.aper_base & address_mask ||
|
||||
aper_limit & address_mask));
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
int amdgpu_device_baco_enter(struct drm_device *dev)
|
||||
{
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
|
|
|
@ -194,6 +194,7 @@ static int hw_id_map[MAX_HWIP] = {
|
|||
[UMC_HWIP] = UMC_HWID,
|
||||
[XGMI_HWIP] = XGMI_HWID,
|
||||
[DCI_HWIP] = DCI_HWID,
|
||||
[PCIE_HWIP] = PCIE_HWID,
|
||||
};
|
||||
|
||||
static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary)
|
||||
|
@ -1435,6 +1436,11 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
|
||||
* which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
|
||||
* but that may change in the future with new GPUs so keep this
|
||||
* check for defensive purposes.
|
||||
*/
|
||||
if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
|
||||
dev_err(adev->dev, "invalid vcn instances\n");
|
||||
return -EINVAL;
|
||||
|
@ -1450,6 +1456,9 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
|
|||
|
||||
switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
|
||||
case 1:
|
||||
/* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
|
||||
* so this won't overflow.
|
||||
*/
|
||||
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
|
||||
adev->vcn.vcn_codec_disable_mask[v] =
|
||||
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
|
||||
|
@ -1709,6 +1718,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(3, 1, 3):
|
||||
case IP_VERSION(3, 1, 5):
|
||||
case IP_VERSION(3, 1, 6):
|
||||
case IP_VERSION(3, 2, 0):
|
||||
case IP_VERSION(3, 2, 1):
|
||||
amdgpu_device_ip_block_add(adev, &dm_ip_block);
|
||||
break;
|
||||
default:
|
||||
|
@ -1886,6 +1897,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
|
|||
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
|
||||
break;
|
||||
case IP_VERSION(4, 0, 0):
|
||||
case IP_VERSION(4, 0, 2):
|
||||
case IP_VERSION(4, 0, 4):
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
|
||||
|
@ -2321,6 +2333,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
|
||||
switch (adev->ip_versions[LSDMA_HWIP][0]) {
|
||||
case IP_VERSION(6, 0, 0):
|
||||
case IP_VERSION(6, 0, 1):
|
||||
case IP_VERSION(6, 0, 2):
|
||||
adev->lsdma.funcs = &lsdma_v6_0_funcs;
|
||||
break;
|
||||
|
|
|
@ -30,6 +30,9 @@
|
|||
#include "atom.h"
|
||||
#include "amdgpu_connectors.h"
|
||||
#include "amdgpu_display.h"
|
||||
#include "soc15_common.h"
|
||||
#include "gc/gc_11_0_0_offset.h"
|
||||
#include "gc/gc_11_0_0_sh_mask.h"
|
||||
#include <asm/div64.h>
|
||||
|
||||
#include <linux/pci.h>
|
||||
|
@ -663,6 +666,11 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
|
|||
{
|
||||
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
|
||||
uint64_t modifier = 0;
|
||||
int num_pipes = 0;
|
||||
int num_pkrs = 0;
|
||||
|
||||
num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
|
||||
num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
|
||||
|
||||
if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
|
||||
modifier = DRM_FORMAT_MOD_LINEAR;
|
||||
|
@ -675,7 +683,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
|
|||
int bank_xor_bits = 0;
|
||||
int packers = 0;
|
||||
int rb = 0;
|
||||
int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
|
||||
int pipes = ilog2(num_pipes);
|
||||
uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
|
||||
|
||||
switch (swizzle >> 2) {
|
||||
|
@ -691,12 +699,17 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
|
|||
case 6: /* 64 KiB _X */
|
||||
block_size_bits = 16;
|
||||
break;
|
||||
case 7: /* 256 KiB */
|
||||
block_size_bits = 18;
|
||||
break;
|
||||
default:
|
||||
/* RESERVED or VAR */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
|
||||
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX11;
|
||||
else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
|
||||
else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX10;
|
||||
|
@ -707,19 +720,32 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
|
|||
case 0: /* Z microtiling */
|
||||
return -EINVAL;
|
||||
case 1: /* S microtiling */
|
||||
if (!has_xor)
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX9;
|
||||
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
|
||||
if (!has_xor)
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX9;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (!has_xor && afb->base.format->cpp[0] != 4)
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX9;
|
||||
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
|
||||
if (!has_xor && afb->base.format->cpp[0] != 4)
|
||||
version = AMD_FMT_MOD_TILE_VER_GFX9;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
break;
|
||||
}
|
||||
|
||||
if (has_xor) {
|
||||
if (num_pipes == num_pkrs && num_pkrs == 0) {
|
||||
DRM_ERROR("invalid number of pipes and packers\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (version) {
|
||||
case AMD_FMT_MOD_TILE_VER_GFX11:
|
||||
pipe_xor_bits = min(block_size_bits - 8, pipes);
|
||||
packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
|
||||
break;
|
||||
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
|
||||
pipe_xor_bits = min(block_size_bits - 8, pipes);
|
||||
packers = min(block_size_bits - 8 - pipe_xor_bits,
|
||||
|
@ -753,9 +779,10 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
|
|||
u64 render_dcc_offset;
|
||||
|
||||
/* Enable constant encode on RAVEN2 and later. */
|
||||
bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN ||
|
||||
bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN ||
|
||||
(adev->asic_type == CHIP_RAVEN &&
|
||||
adev->external_rev_id >= 0x81);
|
||||
adev->external_rev_id >= 0x81)) &&
|
||||
adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0);
|
||||
|
||||
int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
|
||||
dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
|
||||
|
@ -870,10 +897,11 @@ static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
|
|||
return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
|
||||
}
|
||||
case AMD_FMT_MOD_TILE_VER_GFX10:
|
||||
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: {
|
||||
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
|
||||
case AMD_FMT_MOD_TILE_VER_GFX11: {
|
||||
int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
|
||||
|
||||
if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
|
||||
if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
|
||||
AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
|
||||
++pipes_log2;
|
||||
|
||||
|
@ -966,6 +994,9 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
|
|||
case DC_SW_64KB_S_X:
|
||||
block_size_log2 = 16;
|
||||
break;
|
||||
case DC_SW_VAR_S_X:
|
||||
block_size_log2 = 18;
|
||||
break;
|
||||
default:
|
||||
drm_dbg_kms(rfb->base.dev,
|
||||
"Swizzle mode with unknown block size: %d\n", swizzle);
|
||||
|
|
|
@ -35,8 +35,6 @@
|
|||
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
|
||||
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
|
||||
|
||||
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp);
|
||||
void amdgpu_display_update_priority(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
|
||||
uint64_t bo_flags);
|
||||
|
|
|
@ -802,6 +802,16 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (
|
|||
module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* DOC: pcie_p2p (bool)
|
||||
* Enable PCIe P2P (requires large-BAR). Default value: true (on)
|
||||
*/
|
||||
#ifdef CONFIG_HSA_AMD_P2P
|
||||
bool pcie_p2p = true;
|
||||
module_param(pcie_p2p, bool, 0444);
|
||||
MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
|
||||
#endif
|
||||
|
||||
/**
|
||||
* DOC: dcfeaturemask (uint)
|
||||
* Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include <drm/drm_drv.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_trace.h"
|
||||
#include "amdgpu_reset.h"
|
||||
|
||||
/*
|
||||
* Fences
|
||||
|
@ -163,11 +164,16 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
|
|||
if (job && job->job_run_counter) {
|
||||
/* reinit seq for resubmitted jobs */
|
||||
fence->seqno = seq;
|
||||
/* TO be inline with external fence creation and other drivers */
|
||||
dma_fence_get(fence);
|
||||
} else {
|
||||
if (job)
|
||||
if (job) {
|
||||
dma_fence_init(fence, &amdgpu_job_fence_ops,
|
||||
&ring->fence_drv.lock,
|
||||
adev->fence_context + ring->idx, seq);
|
||||
/* Against remove in amdgpu_job_{free, free_cb} */
|
||||
dma_fence_get(fence);
|
||||
}
|
||||
else
|
||||
dma_fence_init(fence, &amdgpu_fence_ops,
|
||||
&ring->fence_drv.lock,
|
||||
|
@ -531,6 +537,24 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
|
||||
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
|
||||
continue;
|
||||
|
||||
if (stop)
|
||||
disable_irq(adev->irq.irq);
|
||||
else
|
||||
enable_irq(adev->irq.irq);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
@ -594,8 +618,10 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
|
|||
for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
|
||||
ptr = &ring->fence_drv.fences[i];
|
||||
old = rcu_dereference_protected(*ptr, 1);
|
||||
if (old && old->ops == &amdgpu_job_fence_ops)
|
||||
if (old && old->ops == &amdgpu_job_fence_ops) {
|
||||
RCU_INIT_POINTER(*ptr, NULL);
|
||||
dma_fence_put(old);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -798,7 +824,10 @@ static int gpu_recover_get(void *data, u64 *val)
|
|||
return 0;
|
||||
}
|
||||
|
||||
*val = amdgpu_device_gpu_recover(adev, NULL);
|
||||
if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
|
||||
flush_work(&adev->reset_work);
|
||||
|
||||
*val = atomic_read(&adev->reset_domain->reset_res);
|
||||
|
||||
pm_runtime_mark_last_busy(dev->dev);
|
||||
pm_runtime_put_autosuspend(dev->dev);
|
||||
|
@ -810,6 +839,14 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
|
|||
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
|
||||
"%lld\n");
|
||||
|
||||
static void amdgpu_debugfs_reset_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
reset_work);
|
||||
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
|
||||
|
@ -821,9 +858,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
|
|||
debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
|
||||
&amdgpu_debugfs_fence_info_fops);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
|
||||
INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
|
||||
debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
|
||||
&amdgpu_debugfs_gpu_recover_fops);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -142,7 +142,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
|
|||
}
|
||||
}
|
||||
|
||||
static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
|
||||
static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
|
||||
{
|
||||
return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
|
||||
}
|
||||
|
||||
static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_compute_multipipe != -1) {
|
||||
DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
|
||||
|
@ -158,6 +163,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
|
|||
return adev->gfx.mec.num_mec > 1;
|
||||
}
|
||||
|
||||
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
int queue = ring->queue;
|
||||
int pipe = ring->pipe;
|
||||
|
||||
/* Policy: use pipe1 queue0 as high priority graphics queue if we
|
||||
* have more than one gfx pipe.
|
||||
*/
|
||||
if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
|
||||
adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
|
||||
int me = ring->me;
|
||||
int bit;
|
||||
|
||||
bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
|
||||
if (ring == &adev->gfx.gfx_ring[bit])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
|
@ -174,7 +201,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
|||
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, queue, pipe;
|
||||
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
|
||||
bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
|
||||
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
|
||||
adev->gfx.mec.num_queue_per_pipe,
|
||||
adev->gfx.num_compute_rings);
|
||||
|
@ -200,18 +227,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
|
|||
|
||||
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, queue, me;
|
||||
int i, queue, pipe;
|
||||
bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
|
||||
int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
|
||||
adev->gfx.me.num_queue_per_pipe;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
|
||||
queue = i % adev->gfx.me.num_queue_per_pipe;
|
||||
me = (i / adev->gfx.me.num_queue_per_pipe)
|
||||
/ adev->gfx.me.num_pipe_per_me;
|
||||
|
||||
if (me >= adev->gfx.me.num_me)
|
||||
break;
|
||||
if (multipipe_policy) {
|
||||
/* policy: amdgpu owns the first queue per pipe at this stage
|
||||
* will extend to mulitple queues per pipe later */
|
||||
if (me == 0 && queue < 1)
|
||||
for (i = 0; i < max_queues_per_me; i++) {
|
||||
pipe = i % adev->gfx.me.num_pipe_per_me;
|
||||
queue = (i / adev->gfx.me.num_pipe_per_me) %
|
||||
adev->gfx.me.num_queue_per_pipe;
|
||||
|
||||
set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
|
||||
adev->gfx.me.queue_bitmap);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < max_queues_per_me; ++i)
|
||||
set_bit(i, adev->gfx.me.queue_bitmap);
|
||||
}
|
||||
|
||||
|
@ -666,6 +699,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
|
|||
if (amdgpu_device_skip_hw_access(adev))
|
||||
return 0;
|
||||
|
||||
if (adev->mes.ring.sched.ready)
|
||||
return amdgpu_mes_rreg(adev, reg);
|
||||
|
||||
BUG_ON(!ring->funcs->emit_rreg);
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
|
@ -733,6 +769,11 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
|
|||
if (amdgpu_device_skip_hw_access(adev))
|
||||
return;
|
||||
|
||||
if (adev->mes.ring.sched.ready) {
|
||||
amdgpu_mes_wreg(adev, reg, v);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_wreg(ring, reg, v);
|
||||
|
|
|
@ -396,6 +396,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
|
|||
int pipe, int queue);
|
||||
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
|
||||
int pipe, int queue);
|
||||
void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
|
||||
|
|
|
@ -242,7 +242,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
|
|||
* @entry: IV entry
|
||||
*
|
||||
* Decodes the interrupt vector at the current rptr
|
||||
* position and also advance the position for for Vega10
|
||||
* position and also advance the position for Vega10
|
||||
* and later GPUs.
|
||||
*/
|
||||
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
|
||||
|
|
|
@ -24,12 +24,18 @@
|
|||
#ifndef __AMDGPU_IMU_H__
|
||||
#define __AMDGPU_IMU_H__
|
||||
|
||||
enum imu_work_mode {
|
||||
DEBUG_MODE,
|
||||
MISSION_MODE
|
||||
};
|
||||
|
||||
struct amdgpu_imu_funcs {
|
||||
int (*init_microcode)(struct amdgpu_device *adev);
|
||||
int (*load_microcode)(struct amdgpu_device *adev);
|
||||
void (*setup_imu)(struct amdgpu_device *adev);
|
||||
int (*start_imu)(struct amdgpu_device *adev);
|
||||
void (*program_rlc_ram)(struct amdgpu_device *adev);
|
||||
int (*wait_for_reset_status)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct imu_rlc_ram_golden {
|
||||
|
@ -46,6 +52,7 @@ struct imu_rlc_ram_golden {
|
|||
|
||||
struct amdgpu_imu {
|
||||
const struct amdgpu_imu_funcs *funcs;
|
||||
enum imu_work_mode mode;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -320,6 +320,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
|
|||
if (!amdgpu_device_has_dc_support(adev)) {
|
||||
if (!adev->enable_virtual_display)
|
||||
/* Disable vblank IRQs aggressively for power-saving */
|
||||
/* XXX: can this be enabled for DC? */
|
||||
adev_to_drm(adev)->vblank_disable_immediate = true;
|
||||
|
||||
r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
|
||||
|
|
|
@ -64,7 +64,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
|
|||
ti.process_name, ti.tgid, ti.task_name, ti.pid);
|
||||
|
||||
if (amdgpu_device_should_recover_gpu(ring->adev)) {
|
||||
r = amdgpu_device_gpu_recover_imp(ring->adev, job);
|
||||
r = amdgpu_device_gpu_recover(ring->adev, job);
|
||||
if (r)
|
||||
DRM_ERROR("GPU Recovery Failed: %d\n", r);
|
||||
} else {
|
||||
|
@ -262,10 +262,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
|
|||
DRM_ERROR("Error scheduling IBs (%d)\n", r);
|
||||
}
|
||||
|
||||
if (!job->job_run_counter)
|
||||
dma_fence_get(fence);
|
||||
else if (finished->error < 0)
|
||||
dma_fence_put(&job->hw_fence);
|
||||
job->job_run_counter++;
|
||||
amdgpu_job_free_resources(job);
|
||||
|
||||
|
|
|
@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
atomic64_read(&adev->visible_pin_size),
|
||||
vram_gtt.vram_size);
|
||||
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
|
||||
vram_gtt.gtt_size *= PAGE_SIZE;
|
||||
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
|
||||
return copy_to_user(out, &vram_gtt,
|
||||
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
|
||||
|
@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
|
||||
|
||||
mem.gtt.total_heap_size = gtt_man->size;
|
||||
mem.gtt.total_heap_size *= PAGE_SIZE;
|
||||
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
|
||||
atomic64_read(&adev->gart_pin_size);
|
||||
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
|
||||
|
|
|
@ -189,15 +189,29 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
|
|||
|
||||
r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
|
||||
if (r) {
|
||||
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
|
||||
dev_err(adev->dev,
|
||||
"(%d) query_status_fence_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
goto error_ids;
|
||||
}
|
||||
adev->mes.query_status_fence_gpu_addr =
|
||||
adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
|
||||
adev->mes.query_status_fence_ptr =
|
||||
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
|
||||
if (r) {
|
||||
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
|
||||
dev_err(adev->dev,
|
||||
"(%d) read_val_offs alloc failed\n", r);
|
||||
goto error_ids;
|
||||
}
|
||||
adev->mes.read_val_gpu_addr =
|
||||
adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
|
||||
adev->mes.read_val_ptr =
|
||||
(uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
|
||||
|
||||
r = amdgpu_mes_doorbell_init(adev);
|
||||
if (r)
|
||||
goto error;
|
||||
|
@ -206,6 +220,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
|
|||
|
||||
error:
|
||||
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
|
||||
error_ids:
|
||||
idr_destroy(&adev->mes.pasid_idr);
|
||||
idr_destroy(&adev->mes.gang_id_idr);
|
||||
|
@ -218,6 +234,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
|
|||
void amdgpu_mes_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
|
||||
|
||||
idr_destroy(&adev->mes.pasid_idr);
|
||||
idr_destroy(&adev->mes.gang_id_idr);
|
||||
|
@ -675,8 +693,10 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
|
|||
queue_input.doorbell_offset = qprops->doorbell_off;
|
||||
queue_input.mqd_addr = queue->mqd_gpu_addr;
|
||||
queue_input.wptr_addr = qprops->wptr_gpu_addr;
|
||||
queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
|
||||
queue_input.queue_type = qprops->queue_type;
|
||||
queue_input.paging = qprops->paging;
|
||||
queue_input.is_kfd_process = 0;
|
||||
|
||||
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
|
||||
if (r) {
|
||||
|
@ -792,6 +812,118 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
|
|||
return r;
|
||||
}
|
||||
|
||||
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
|
||||
{
|
||||
struct mes_misc_op_input op_input;
|
||||
int r, val = 0;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
|
||||
op_input.op = MES_MISC_OP_READ_REG;
|
||||
op_input.read_reg.reg_offset = reg;
|
||||
op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
|
||||
|
||||
if (!adev->mes.funcs->misc_op) {
|
||||
DRM_ERROR("mes rreg is not supported!\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
|
||||
if (r)
|
||||
DRM_ERROR("failed to read reg (0x%x)\n", reg);
|
||||
else
|
||||
val = *(adev->mes.read_val_ptr);
|
||||
|
||||
error:
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
return val;
|
||||
}
|
||||
|
||||
int amdgpu_mes_wreg(struct amdgpu_device *adev,
|
||||
uint32_t reg, uint32_t val)
|
||||
{
|
||||
struct mes_misc_op_input op_input;
|
||||
int r;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
|
||||
op_input.op = MES_MISC_OP_WRITE_REG;
|
||||
op_input.write_reg.reg_offset = reg;
|
||||
op_input.write_reg.reg_value = val;
|
||||
|
||||
if (!adev->mes.funcs->misc_op) {
|
||||
DRM_ERROR("mes wreg is not supported!\n");
|
||||
r = -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
|
||||
if (r)
|
||||
DRM_ERROR("failed to write reg (0x%x)\n", reg);
|
||||
|
||||
error:
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask)
|
||||
{
|
||||
struct mes_misc_op_input op_input;
|
||||
int r;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
|
||||
op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
|
||||
op_input.wrm_reg.reg0 = reg0;
|
||||
op_input.wrm_reg.reg1 = reg1;
|
||||
op_input.wrm_reg.ref = ref;
|
||||
op_input.wrm_reg.mask = mask;
|
||||
|
||||
if (!adev->mes.funcs->misc_op) {
|
||||
DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
|
||||
r = -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
|
||||
if (r)
|
||||
DRM_ERROR("failed to reg_write_reg_wait\n");
|
||||
|
||||
error:
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
|
||||
uint32_t val, uint32_t mask)
|
||||
{
|
||||
struct mes_misc_op_input op_input;
|
||||
int r;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
|
||||
op_input.op = MES_MISC_OP_WRM_REG_WAIT;
|
||||
op_input.wrm_reg.reg0 = reg;
|
||||
op_input.wrm_reg.ref = val;
|
||||
op_input.wrm_reg.mask = mask;
|
||||
|
||||
if (!adev->mes.funcs->misc_op) {
|
||||
DRM_ERROR("mes reg wait is not supported!\n");
|
||||
r = -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
|
||||
if (r)
|
||||
DRM_ERROR("failed to reg_write_reg_wait\n");
|
||||
|
||||
error:
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
|
@ -801,6 +933,8 @@ amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
|
|||
props->hqd_base_gpu_addr = ring->gpu_addr;
|
||||
props->rptr_gpu_addr = ring->rptr_gpu_addr;
|
||||
props->wptr_gpu_addr = ring->wptr_gpu_addr;
|
||||
props->wptr_mc_addr =
|
||||
ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
|
||||
props->queue_size = ring->ring_size;
|
||||
props->eop_gpu_addr = ring->eop_gpu_addr;
|
||||
props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
|
||||
|
@ -961,7 +1095,8 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
|
|||
r = amdgpu_bo_create_kernel(adev,
|
||||
sizeof(struct amdgpu_mes_ctx_meta_data),
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||||
&ctx_data->meta_data_obj, NULL,
|
||||
&ctx_data->meta_data_obj,
|
||||
&ctx_data->meta_data_mc_addr,
|
||||
&ctx_data->meta_data_ptr);
|
||||
if (!ctx_data->meta_data_obj)
|
||||
return -ENOMEM;
|
||||
|
@ -975,7 +1110,9 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
|
|||
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
|
||||
{
|
||||
if (ctx_data->meta_data_obj)
|
||||
amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
|
||||
&ctx_data->meta_data_mc_addr,
|
||||
&ctx_data->meta_data_ptr);
|
||||
}
|
||||
|
||||
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
|
||||
|
|
|
@ -33,6 +33,13 @@
|
|||
#define AMDGPU_MES_MAX_GFX_PIPES 2
|
||||
#define AMDGPU_MES_MAX_SDMA_PIPES 2
|
||||
|
||||
#define AMDGPU_MES_API_VERSION_SHIFT 12
|
||||
#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
|
||||
|
||||
#define AMDGPU_MES_VERSION_MASK 0x00000fff
|
||||
#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
|
||||
#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
|
||||
|
||||
enum amdgpu_mes_priority_level {
|
||||
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
|
||||
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
|
||||
|
@ -65,6 +72,9 @@ struct amdgpu_mes {
|
|||
|
||||
spinlock_t queue_id_lock;
|
||||
|
||||
uint32_t sched_version;
|
||||
uint32_t kiq_version;
|
||||
|
||||
uint32_t total_max_queue;
|
||||
uint32_t doorbell_id_offset;
|
||||
uint32_t max_doorbell_slices;
|
||||
|
@ -109,6 +119,10 @@ struct amdgpu_mes {
|
|||
uint32_t query_status_fence_offs;
|
||||
uint64_t query_status_fence_gpu_addr;
|
||||
uint64_t *query_status_fence_ptr;
|
||||
uint32_t read_val_offs;
|
||||
uint64_t read_val_gpu_addr;
|
||||
uint32_t *read_val_ptr;
|
||||
|
||||
uint32_t saved_flags;
|
||||
|
||||
/* initialize kiq pipe */
|
||||
|
@ -166,6 +180,7 @@ struct amdgpu_mes_queue_properties {
|
|||
uint64_t hqd_base_gpu_addr;
|
||||
uint64_t rptr_gpu_addr;
|
||||
uint64_t wptr_gpu_addr;
|
||||
uint64_t wptr_mc_addr;
|
||||
uint32_t queue_size;
|
||||
uint64_t eop_gpu_addr;
|
||||
uint32_t hqd_pipe_priority;
|
||||
|
@ -198,12 +213,14 @@ struct mes_add_queue_input {
|
|||
uint32_t doorbell_offset;
|
||||
uint64_t mqd_addr;
|
||||
uint64_t wptr_addr;
|
||||
uint64_t wptr_mc_addr;
|
||||
uint32_t queue_type;
|
||||
uint32_t paging;
|
||||
uint32_t gws_base;
|
||||
uint32_t gws_size;
|
||||
uint64_t tba_addr;
|
||||
uint64_t tma_addr;
|
||||
uint32_t is_kfd_process;
|
||||
};
|
||||
|
||||
struct mes_remove_queue_input {
|
||||
|
@ -233,6 +250,36 @@ struct mes_resume_gang_input {
|
|||
uint64_t gang_context_addr;
|
||||
};
|
||||
|
||||
enum mes_misc_opcode {
|
||||
MES_MISC_OP_WRITE_REG,
|
||||
MES_MISC_OP_READ_REG,
|
||||
MES_MISC_OP_WRM_REG_WAIT,
|
||||
MES_MISC_OP_WRM_REG_WR_WAIT,
|
||||
};
|
||||
|
||||
struct mes_misc_op_input {
|
||||
enum mes_misc_opcode op;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reg_offset;
|
||||
uint64_t buffer_addr;
|
||||
} read_reg;
|
||||
|
||||
struct {
|
||||
uint32_t reg_offset;
|
||||
uint32_t reg_value;
|
||||
} write_reg;
|
||||
|
||||
struct {
|
||||
uint32_t ref;
|
||||
uint32_t mask;
|
||||
uint32_t reg0;
|
||||
uint32_t reg1;
|
||||
} wrm_reg;
|
||||
};
|
||||
};
|
||||
|
||||
struct amdgpu_mes_funcs {
|
||||
int (*add_hw_queue)(struct amdgpu_mes *mes,
|
||||
struct mes_add_queue_input *input);
|
||||
|
@ -248,6 +295,9 @@ struct amdgpu_mes_funcs {
|
|||
|
||||
int (*resume_gang)(struct amdgpu_mes *mes,
|
||||
struct mes_resume_gang_input *input);
|
||||
|
||||
int (*misc_op)(struct amdgpu_mes *mes,
|
||||
struct mes_misc_op_input *input);
|
||||
};
|
||||
|
||||
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
|
||||
|
@ -280,6 +330,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
|
|||
enum amdgpu_unmap_queues_action action,
|
||||
u64 gpu_addr, u64 seq);
|
||||
|
||||
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
|
||||
int amdgpu_mes_wreg(struct amdgpu_device *adev,
|
||||
uint32_t reg, uint32_t val);
|
||||
int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
|
||||
uint32_t val, uint32_t mask);
|
||||
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask);
|
||||
|
||||
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
|
||||
int queue_type, int idx,
|
||||
struct amdgpu_mes_ctx_data *ctx_data,
|
||||
|
|
|
@ -107,6 +107,7 @@ struct amdgpu_mes_ctx_meta_data {
|
|||
struct amdgpu_mes_ctx_data {
|
||||
struct amdgpu_bo *meta_data_obj;
|
||||
uint64_t meta_data_gpu_addr;
|
||||
uint64_t meta_data_mc_addr;
|
||||
struct amdgpu_bo_va *meta_data_va;
|
||||
void *meta_data_ptr;
|
||||
uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];
|
||||
|
|
|
@ -350,15 +350,11 @@ struct amdgpu_mode_info {
|
|||
|
||||
#define AMDGPU_MAX_BL_LEVEL 0xFF
|
||||
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
|
||||
struct amdgpu_backlight_privdata {
|
||||
struct amdgpu_encoder *encoder;
|
||||
uint8_t negative;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
struct amdgpu_atom_ss {
|
||||
uint16_t percentage;
|
||||
uint16_t percentage_divider;
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
#include "amdgpu_xgmi.h"
|
||||
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
|
||||
#include "atom.h"
|
||||
#include "amdgpu_reset.h"
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
#include <asm/mce.h>
|
||||
|
||||
|
@ -2946,7 +2948,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
|
|||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
|
||||
schedule_work(&ras->recovery_work);
|
||||
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -328,10 +328,16 @@ struct ecc_info_per_ch {
|
|||
uint16_t ce_count_hi_chip;
|
||||
uint64_t mca_umc_status;
|
||||
uint64_t mca_umc_addr;
|
||||
uint64_t mca_ceumc_addr;
|
||||
};
|
||||
|
||||
struct umc_ecc_info {
|
||||
struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
|
||||
|
||||
/* Determine smu ecctable whether support
|
||||
* record correctable error address
|
||||
*/
|
||||
int record_ce_addr_supported;
|
||||
};
|
||||
|
||||
struct amdgpu_ras {
|
||||
|
|
|
@ -132,6 +132,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
|
|||
}
|
||||
|
||||
atomic_set(&reset_domain->in_gpu_reset, 0);
|
||||
atomic_set(&reset_domain->reset_res, 0);
|
||||
init_rwsem(&reset_domain->sem);
|
||||
|
||||
return reset_domain;
|
||||
|
|
|
@ -82,6 +82,7 @@ struct amdgpu_reset_domain {
|
|||
enum amdgpu_reset_domain_type type;
|
||||
struct rw_semaphore sem;
|
||||
atomic_t in_gpu_reset;
|
||||
atomic_t reset_res;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -543,12 +543,12 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
|
|||
*/
|
||||
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
|
||||
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
|
||||
if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
|
||||
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
|
||||
prop->hqd_queue_priority =
|
||||
AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
|
||||
}
|
||||
if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
|
||||
amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
|
||||
(ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
|
||||
amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
|
||||
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
|
||||
prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -143,6 +143,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
|
|||
uint32_t wait_seq,
|
||||
signed long timeout);
|
||||
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
|
||||
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
|
||||
|
||||
/*
|
||||
* Rings.
|
||||
|
|
|
@ -1798,18 +1798,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
|
||||
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
|
||||
|
||||
/* Compute GTT size, either bsaed on 3/4th the size of RAM size
|
||||
/* Compute GTT size, either based on 1/2 the size of RAM size
|
||||
* or whatever the user passed on module init */
|
||||
if (amdgpu_gtt_size == -1) {
|
||||
struct sysinfo si;
|
||||
|
||||
si_meminfo(&si);
|
||||
gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
|
||||
adev->gmc.mc_vram_size),
|
||||
((uint64_t)si.totalram * si.mem_unit * 3/4));
|
||||
}
|
||||
else
|
||||
/* Certain GL unit tests for large textures can cause problems
|
||||
* with the OOM killer since there is no way to link this memory
|
||||
* to a process. This was originally mitigated (but not necessarily
|
||||
* eliminated) by limiting the GTT size. The problem is this limit
|
||||
* is often too low for many modern games so just make the limit 1/2
|
||||
* of system memory which aligns with TTM. The OOM accounting needs
|
||||
* to be addressed, but we shouldn't prevent common 3D applications
|
||||
* from being usable just to potentially mitigate that corner case.
|
||||
*/
|
||||
gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
|
||||
(u64)si.totalram * si.mem_unit / 2);
|
||||
} else {
|
||||
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
|
||||
}
|
||||
|
||||
/* Initialize GTT memory pool */
|
||||
r = amdgpu_gtt_mgr_init(adev, gtt_size);
|
||||
|
|
|
@ -486,26 +486,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
|
|||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
return AMDGPU_FW_LOAD_SMU;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
case CHIP_RENOIR:
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
case CHIP_VANGOGH:
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
case CHIP_ALDEBARAN:
|
||||
case CHIP_BEIGE_GOBY:
|
||||
case CHIP_YELLOW_CARP:
|
||||
if (!load_type)
|
||||
return AMDGPU_FW_LOAD_DIRECT;
|
||||
else
|
||||
return AMDGPU_FW_LOAD_PSP;
|
||||
case CHIP_CYAN_SKILLFISH:
|
||||
if (!(load_type &&
|
||||
adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))
|
||||
|
|
|
@ -329,6 +329,18 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* from vcn4 and above, only unified queue is used */
|
||||
static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
bool ret = false;
|
||||
|
||||
if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
|
||||
{
|
||||
bool ret = false;
|
||||
|
@ -718,19 +730,55 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
return r;
|
||||
}
|
||||
|
||||
static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
|
||||
uint32_t ib_pack_in_dw, bool enc)
|
||||
{
|
||||
uint32_t *ib_checksum;
|
||||
|
||||
ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
|
||||
ib->ptr[ib->length_dw++] = 0x30000002;
|
||||
ib_checksum = &ib->ptr[ib->length_dw++];
|
||||
ib->ptr[ib->length_dw++] = ib_pack_in_dw;
|
||||
|
||||
ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
|
||||
ib->ptr[ib->length_dw++] = 0x30000001;
|
||||
ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
|
||||
ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
|
||||
|
||||
return ib_checksum;
|
||||
}
|
||||
|
||||
static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
|
||||
uint32_t ib_pack_in_dw)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t checksum = 0;
|
||||
|
||||
for (i = 0; i < ib_pack_in_dw; i++)
|
||||
checksum += *(*ib_checksum + 2 + i);
|
||||
|
||||
**ib_checksum = checksum;
|
||||
}
|
||||
|
||||
static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib_msg,
|
||||
struct dma_fence **fence)
|
||||
{
|
||||
struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
|
||||
const unsigned int ib_size_dw = 64;
|
||||
unsigned int ib_size_dw = 64;
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct dma_fence *f = NULL;
|
||||
struct amdgpu_job *job;
|
||||
struct amdgpu_ib *ib;
|
||||
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
|
||||
bool sq = amdgpu_vcn_using_unified_queue(ring);
|
||||
uint32_t *ib_checksum;
|
||||
uint32_t ib_pack_in_dw;
|
||||
int i, r;
|
||||
|
||||
if (sq)
|
||||
ib_size_dw += 8;
|
||||
|
||||
r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
|
||||
AMDGPU_IB_POOL_DIRECT, &job);
|
||||
if (r)
|
||||
|
@ -739,6 +787,13 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
|
|||
ib = &job->ibs[0];
|
||||
ib->length_dw = 0;
|
||||
|
||||
/* single queue headers */
|
||||
if (sq) {
|
||||
ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
|
||||
+ 4 + 2; /* engine info + decoding ib in dw */
|
||||
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
|
||||
}
|
||||
|
||||
ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
|
||||
ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
|
||||
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
|
||||
|
@ -752,6 +807,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
|
|||
for (i = ib->length_dw; i < ib_size_dw; ++i)
|
||||
ib->ptr[i] = 0x0;
|
||||
|
||||
if (sq)
|
||||
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
|
||||
|
||||
r = amdgpu_job_submit_direct(job, ring, &f);
|
||||
if (r)
|
||||
goto err_free;
|
||||
|
@ -838,13 +896,18 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
|
|||
struct amdgpu_ib *ib_msg,
|
||||
struct dma_fence **fence)
|
||||
{
|
||||
const unsigned ib_size_dw = 16;
|
||||
unsigned int ib_size_dw = 16;
|
||||
struct amdgpu_job *job;
|
||||
struct amdgpu_ib *ib;
|
||||
struct dma_fence *f = NULL;
|
||||
uint32_t *ib_checksum = NULL;
|
||||
uint64_t addr;
|
||||
bool sq = amdgpu_vcn_using_unified_queue(ring);
|
||||
int i, r;
|
||||
|
||||
if (sq)
|
||||
ib_size_dw += 8;
|
||||
|
||||
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
|
||||
AMDGPU_IB_POOL_DIRECT, &job);
|
||||
if (r)
|
||||
|
@ -854,6 +917,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
|
|||
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
|
||||
|
||||
ib->length_dw = 0;
|
||||
|
||||
if (sq)
|
||||
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
|
||||
|
||||
ib->ptr[ib->length_dw++] = 0x00000018;
|
||||
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
|
||||
ib->ptr[ib->length_dw++] = handle;
|
||||
|
@ -873,6 +940,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
|
|||
for (i = ib->length_dw; i < ib_size_dw; ++i)
|
||||
ib->ptr[i] = 0x0;
|
||||
|
||||
if (sq)
|
||||
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
|
||||
|
||||
r = amdgpu_job_submit_direct(job, ring, &f);
|
||||
if (r)
|
||||
goto err;
|
||||
|
@ -892,13 +962,18 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
|
|||
struct amdgpu_ib *ib_msg,
|
||||
struct dma_fence **fence)
|
||||
{
|
||||
const unsigned ib_size_dw = 16;
|
||||
unsigned int ib_size_dw = 16;
|
||||
struct amdgpu_job *job;
|
||||
struct amdgpu_ib *ib;
|
||||
struct dma_fence *f = NULL;
|
||||
uint32_t *ib_checksum = NULL;
|
||||
uint64_t addr;
|
||||
bool sq = amdgpu_vcn_using_unified_queue(ring);
|
||||
int i, r;
|
||||
|
||||
if (sq)
|
||||
ib_size_dw += 8;
|
||||
|
||||
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
|
||||
AMDGPU_IB_POOL_DIRECT, &job);
|
||||
if (r)
|
||||
|
@ -908,6 +983,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
|
|||
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
|
||||
|
||||
ib->length_dw = 0;
|
||||
|
||||
if (sq)
|
||||
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
|
||||
|
||||
ib->ptr[ib->length_dw++] = 0x00000018;
|
||||
ib->ptr[ib->length_dw++] = 0x00000001;
|
||||
ib->ptr[ib->length_dw++] = handle;
|
||||
|
@ -927,6 +1006,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
|
|||
for (i = ib->length_dw; i < ib_size_dw; ++i)
|
||||
ib->ptr[i] = 0x0;
|
||||
|
||||
if (sq)
|
||||
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
|
||||
|
||||
r = amdgpu_job_submit_direct(job, ring, &f);
|
||||
if (r)
|
||||
goto err;
|
||||
|
@ -977,6 +1059,20 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
{
|
||||
long r;
|
||||
|
||||
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
|
||||
|
||||
error:
|
||||
return r;
|
||||
}
|
||||
|
||||
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
|
||||
{
|
||||
switch(ring) {
|
||||
|
|
|
@ -364,6 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
|
|||
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
|
||||
int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
|
||||
int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
|
||||
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
|
||||
|
||||
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
|
||||
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
|
||||
|
|
|
@ -76,6 +76,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
|||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
|
||||
if (adev->mes.ring.sched.ready) {
|
||||
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
|
||||
ref, mask);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
* (uncached system pages).
|
||||
* Each VM has an ID associated with it and there is a page table
|
||||
* associated with each VMID. When executing a command buffer,
|
||||
* the kernel tells the the ring what VMID to use for that command
|
||||
* the kernel tells the ring what VMID to use for that command
|
||||
* buffer. VMIDs are allocated dynamically as commands are submitted.
|
||||
* The userspace drivers maintain their own address space and the kernel
|
||||
* sets up their pages tables accordingly when they submit their
|
||||
|
|
|
@ -118,8 +118,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
|
||||
static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
|
||||
{
|
||||
u8 level;
|
||||
|
@ -251,18 +249,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
|
|||
}
|
||||
}
|
||||
|
||||
#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */
|
||||
|
||||
void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder)
|
||||
{
|
||||
}
|
||||
|
||||
void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
|
||||
{
|
||||
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
|
||||
|
|
|
@ -39,7 +39,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
|
|||
0x00000000, // DB_DEPTH_CLEAR
|
||||
0x00000000, // PA_SC_SCREEN_SCISSOR_TL
|
||||
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
|
||||
0x00000000, // DB_DFSM_CONTROL
|
||||
0, // HOLE
|
||||
0x00000000, // DB_RESERVED_REG_2
|
||||
0x00000000, // DB_Z_INFO
|
||||
0x00000000, // DB_STENCIL_INFO
|
||||
|
@ -50,7 +50,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
|
|||
0x00000000, // DB_RESERVED_REG_1
|
||||
0x00000000, // DB_RESERVED_REG_3
|
||||
0x00000000, // DB_SPI_VRS_CENTER_LOCATION
|
||||
0x00000000, // DB_VRS_OVERRIDE_CNTL
|
||||
0, // HOLE
|
||||
0x00000000, // DB_Z_READ_BASE_HI
|
||||
0x00000000, // DB_STENCIL_READ_BASE_HI
|
||||
0x00000000, // DB_Z_WRITE_BASE_HI
|
||||
|
@ -270,29 +270,29 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
|
|||
0x00000000, // PA_SC_FSR_EN
|
||||
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
|
||||
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
|
||||
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_VIEW
|
||||
0, // HOLE
|
||||
0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
|
||||
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
|
||||
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
|
||||
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
|
||||
0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
|
||||
0, // HOLE
|
||||
0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // PA_SC_VRS_RATE_BASE
|
||||
0x00000000, // PA_SC_VRS_RATE_BASE_EXT
|
||||
0x00000000, // PA_SC_VRS_RATE_SIZE_XY
|
||||
0x00000000, // PA_SC_VRS_RATE_VIEW
|
||||
0xffffffff, // VGT_MAX_VTX_INDX
|
||||
0x00000000, // VGT_MIN_VTX_INDX
|
||||
0x00000000, // VGT_INDX_OFFSET
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
|
||||
0x00550055, // CB_RMI_GL2_CACHE_CONTROL
|
||||
0x00000000, // CB_BLEND_RED
|
||||
0x00000000, // CB_BLEND_GREEN
|
||||
0x00000000, // CB_BLEND_BLUE
|
||||
0x00000000, // CB_BLEND_ALPHA
|
||||
0x00000000, // CB_DCC_CONTROL
|
||||
0x00000000, // CB_FDCC_CONTROL
|
||||
0x00000000, // CB_COVERAGE_OUT_CONTROL
|
||||
0x00000000, // DB_STENCIL_CONTROL
|
||||
0x01000000, // DB_STENCILREFMASK
|
||||
|
@ -470,8 +470,8 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
|
|||
0x00000000, // SPI_BARYC_CNTL
|
||||
0, // HOLE
|
||||
0x00000000, // SPI_TMPRING_SIZE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // SPI_GFX_SCRATCH_BASE_LO
|
||||
0x00000000, // SPI_GFX_SCRATCH_BASE_HI
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
|
@ -545,7 +545,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
|
|||
0x00000000, // PA_STEREO_CNTL
|
||||
0x00000000, // PA_STATE_STEREO_X
|
||||
0x00000000, // PA_CL_VRS_CNTL
|
||||
0x00000000, // PA_SIDEBAND_REQUEST_DELAYS
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
|
@ -658,30 +658,30 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
|
|||
0x00000000, // PA_SU_POINT_MINMAX
|
||||
0x00000000, // PA_SU_LINE_CNTL
|
||||
0x00000000, // PA_SC_LINE_STIPPLE
|
||||
0x00000000, // VGT_OUTPUT_PATH_CNTL
|
||||
0x00000000, // VGT_HOS_CNTL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_HOS_MAX_TESS_LEVEL
|
||||
0x00000000, // VGT_HOS_MIN_TESS_LEVEL
|
||||
0x00000000, // VGT_HOS_REUSE_DEPTH
|
||||
0x00000000, // VGT_GROUP_PRIM_TYPE
|
||||
0x00000000, // VGT_GROUP_FIRST_DECR
|
||||
0x00000000, // VGT_GROUP_DECR
|
||||
0x00000000, // VGT_GROUP_VECT_0_CNTL
|
||||
0x00000000, // VGT_GROUP_VECT_1_CNTL
|
||||
0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
|
||||
0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
|
||||
0x00000000, // VGT_GS_MODE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_GS_ONCHIP_CNTL
|
||||
0x00000000, // PA_SC_MODE_CNTL_0
|
||||
0x00000000, // PA_SC_MODE_CNTL_1
|
||||
0x00000000, // VGT_ENHANCE
|
||||
0x00000100, // VGT_GS_PER_ES
|
||||
0x00000080, // VGT_ES_PER_GS
|
||||
0x00000002, // VGT_GS_PER_VS
|
||||
0x00000000, // VGT_GSVS_RING_OFFSET_1
|
||||
0x00000000, // VGT_GSVS_RING_OFFSET_2
|
||||
0x00000000, // VGT_GSVS_RING_OFFSET_3
|
||||
0x00000000, // VGT_GS_OUT_PRIM_TYPE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // IA_ENHANCE
|
||||
};
|
||||
static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
|
||||
|
@ -695,37 +695,36 @@ static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
|
|||
};
|
||||
static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
|
||||
{
|
||||
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
|
||||
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_INSTANCE_STEP_RATE_0
|
||||
0x00000000, // VGT_INSTANCE_STEP_RATE_1
|
||||
0x000000ff, // IA_MULTI_VGT_PARAM
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_ESGS_RING_ITEMSIZE
|
||||
0x00000000, // VGT_GSVS_RING_ITEMSIZE
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_REUSE_OFF
|
||||
0x00000000, // VGT_VTX_CNT_EN
|
||||
0, // HOLE
|
||||
0x00000000, // DB_HTILE_SURFACE
|
||||
0x00000000, // DB_SRESULTS_COMPARE_STATE0
|
||||
0x00000000, // DB_SRESULTS_COMPARE_STATE1
|
||||
0x00000000, // DB_PRELOAD_CONTROL
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
|
||||
0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
|
||||
0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
|
||||
0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
|
||||
0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
|
@ -745,10 +744,10 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
|
|||
0x00000000, // VGT_TESS_DISTRIBUTION
|
||||
0x00000000, // VGT_SHADER_STAGES_EN
|
||||
0x00000000, // VGT_LS_HS_CONFIG
|
||||
0x00000000, // VGT_GS_VERT_ITEMSIZE
|
||||
0x00000000, // VGT_GS_VERT_ITEMSIZE_1
|
||||
0x00000000, // VGT_GS_VERT_ITEMSIZE_2
|
||||
0x00000000, // VGT_GS_VERT_ITEMSIZE_3
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // VGT_TF_PARAM
|
||||
0x00000000, // DB_ALPHA_TO_MASK
|
||||
0, // HOLE
|
||||
|
@ -759,11 +758,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
|
|||
0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
|
||||
0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
|
||||
0x00000000, // VGT_GS_INSTANCE_CNT
|
||||
0x00000000, // VGT_STRMOUT_CONFIG
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
|
||||
};
|
||||
static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
|
||||
{
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // PA_SC_CENTROID_PRIORITY_0
|
||||
0x00000000, // PA_SC_CENTROID_PRIORITY_1
|
||||
0x00001000, // PA_SC_LINE_CNTL
|
||||
|
@ -797,126 +807,126 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
|
|||
0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
|
||||
0x00000000, // PA_SC_NGG_MODE_CNTL
|
||||
0x00000000, // PA_SC_BINNER_CNTL_2
|
||||
0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
|
||||
0x00000020, // VGT_OUT_DEALLOC_CNTL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR0_BASE
|
||||
0x00000000, // CB_COLOR0_PITCH
|
||||
0x00000000, // CB_COLOR0_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR0_VIEW
|
||||
0x00000000, // CB_COLOR0_INFO
|
||||
0x00000000, // CB_COLOR0_ATTRIB
|
||||
0x00000000, // CB_COLOR0_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR0_CMASK
|
||||
0x00000000, // CB_COLOR0_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR0_FMASK
|
||||
0x00000000, // CB_COLOR0_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR0_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR0_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR0_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR0_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR1_BASE
|
||||
0x00000000, // CB_COLOR1_PITCH
|
||||
0x00000000, // CB_COLOR1_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR1_VIEW
|
||||
0x00000000, // CB_COLOR1_INFO
|
||||
0x00000000, // CB_COLOR1_ATTRIB
|
||||
0x00000000, // CB_COLOR1_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR1_CMASK
|
||||
0x00000000, // CB_COLOR1_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR1_FMASK
|
||||
0x00000000, // CB_COLOR1_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR1_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR1_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR1_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR1_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR2_BASE
|
||||
0x00000000, // CB_COLOR2_PITCH
|
||||
0x00000000, // CB_COLOR2_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR2_VIEW
|
||||
0x00000000, // CB_COLOR2_INFO
|
||||
0x00000000, // CB_COLOR2_ATTRIB
|
||||
0x00000000, // CB_COLOR2_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR2_CMASK
|
||||
0x00000000, // CB_COLOR2_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR2_FMASK
|
||||
0x00000000, // CB_COLOR2_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR2_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR2_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR2_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR2_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR3_BASE
|
||||
0x00000000, // CB_COLOR3_PITCH
|
||||
0x00000000, // CB_COLOR3_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR3_VIEW
|
||||
0x00000000, // CB_COLOR3_INFO
|
||||
0x00000000, // CB_COLOR3_ATTRIB
|
||||
0x00000000, // CB_COLOR3_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR3_CMASK
|
||||
0x00000000, // CB_COLOR3_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR3_FMASK
|
||||
0x00000000, // CB_COLOR3_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR3_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR3_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR3_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR3_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR4_BASE
|
||||
0x00000000, // CB_COLOR4_PITCH
|
||||
0x00000000, // CB_COLOR4_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR4_VIEW
|
||||
0x00000000, // CB_COLOR4_INFO
|
||||
0x00000000, // CB_COLOR4_ATTRIB
|
||||
0x00000000, // CB_COLOR4_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR4_CMASK
|
||||
0x00000000, // CB_COLOR4_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR4_FMASK
|
||||
0x00000000, // CB_COLOR4_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR4_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR4_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR4_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR4_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR5_BASE
|
||||
0x00000000, // CB_COLOR5_PITCH
|
||||
0x00000000, // CB_COLOR5_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR5_VIEW
|
||||
0x00000000, // CB_COLOR5_INFO
|
||||
0x00000000, // CB_COLOR5_ATTRIB
|
||||
0x00000000, // CB_COLOR5_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR5_CMASK
|
||||
0x00000000, // CB_COLOR5_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR5_FMASK
|
||||
0x00000000, // CB_COLOR5_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR5_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR5_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR5_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR5_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR6_BASE
|
||||
0x00000000, // CB_COLOR6_PITCH
|
||||
0x00000000, // CB_COLOR6_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR6_VIEW
|
||||
0x00000000, // CB_COLOR6_INFO
|
||||
0x00000000, // CB_COLOR6_ATTRIB
|
||||
0x00000000, // CB_COLOR6_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR6_CMASK
|
||||
0x00000000, // CB_COLOR6_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR6_FMASK
|
||||
0x00000000, // CB_COLOR6_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR6_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR6_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR6_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR6_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR7_BASE
|
||||
0x00000000, // CB_COLOR7_PITCH
|
||||
0x00000000, // CB_COLOR7_SLICE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR7_VIEW
|
||||
0x00000000, // CB_COLOR7_INFO
|
||||
0x00000000, // CB_COLOR7_ATTRIB
|
||||
0x00000000, // CB_COLOR7_DCC_CONTROL
|
||||
0x00000000, // CB_COLOR7_CMASK
|
||||
0x00000000, // CB_COLOR7_CMASK_SLICE
|
||||
0x00000000, // CB_COLOR7_FMASK
|
||||
0x00000000, // CB_COLOR7_FMASK_SLICE
|
||||
0x00000000, // CB_COLOR7_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR7_CLEAR_WORD1
|
||||
0x00000000, // CB_COLOR7_FDCC_CONTROL
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR7_DCC_BASE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR0_BASE_EXT
|
||||
|
@ -927,22 +937,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
|
|||
0x00000000, // CB_COLOR5_BASE_EXT
|
||||
0x00000000, // CB_COLOR6_BASE_EXT
|
||||
0x00000000, // CB_COLOR7_BASE_EXT
|
||||
0x00000000, // CB_COLOR0_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR1_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR2_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR3_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR4_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR5_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR6_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR7_CMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR0_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR1_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR2_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR3_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR4_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR5_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR6_FMASK_BASE_EXT
|
||||
0x00000000, // CB_COLOR7_FMASK_BASE_EXT
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
0x00000000, // CB_COLOR0_DCC_BASE_EXT
|
||||
0x00000000, // CB_COLOR1_DCC_BASE_EXT
|
||||
0x00000000, // CB_COLOR2_DCC_BASE_EXT
|
||||
|
@ -976,8 +986,7 @@ static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
|
|||
{gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
|
||||
{gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
|
||||
{gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
|
||||
{gfx11_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
|
||||
{gfx11_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
|
||||
{gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
|
||||
{ 0, 0, 0 }
|
||||
};
|
||||
static const struct cs_section_def gfx11_cs_data[] = {
|
||||
|
|
|
@ -53,7 +53,7 @@
|
|||
* 2. Async ring
|
||||
*/
|
||||
#define GFX10_NUM_GFX_RINGS_NV1X 1
|
||||
#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
|
||||
#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
|
||||
#define GFX10_MEC_HPD_SIZE 2048
|
||||
|
||||
#define F32_CE_PROGRAM_RAM_SIZE 65536
|
||||
|
@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
|
|||
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
|
||||
uint32_t tmp = 0;
|
||||
unsigned i;
|
||||
int r;
|
||||
|
||||
WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
|
@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
|
|||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
|
||||
amdgpu_ring_write(ring, scratch -
|
||||
PACKET3_SET_UCONFIG_REG_START);
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
|
||||
tmp = RREG32(scratch);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
if (amdgpu_emu_mode == 1)
|
||||
|
@ -4711,6 +4712,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
|
|||
{
|
||||
struct amdgpu_ring *ring;
|
||||
unsigned int irq_type;
|
||||
unsigned int hw_prio;
|
||||
|
||||
ring = &adev->gfx.gfx_ring[ring_id];
|
||||
|
||||
|
@ -4728,8 +4730,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
|
|||
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
|
||||
|
||||
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
|
||||
hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
|
||||
AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
|
||||
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
|
||||
AMDGPU_RING_PRIO_DEFAULT, NULL);
|
||||
hw_prio, NULL);
|
||||
}
|
||||
|
||||
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
|
||||
|
@ -4791,7 +4795,7 @@ static int gfx_v10_0_sw_init(void *handle)
|
|||
case IP_VERSION(10, 3, 3):
|
||||
case IP_VERSION(10, 3, 7):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 2;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
adev->gfx.mec.num_mec = 2;
|
||||
adev->gfx.mec.num_pipe_per_mec = 4;
|
||||
|
@ -6581,6 +6585,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
|
|||
}
|
||||
}
|
||||
|
||||
static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
|
||||
struct v10_gfx_mqd *mqd,
|
||||
struct amdgpu_mqd_prop *prop)
|
||||
{
|
||||
bool priority = 0;
|
||||
u32 tmp;
|
||||
|
||||
/* set up default queue priority level
|
||||
* 0x0 = low priority, 0x1 = high priority
|
||||
*/
|
||||
if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
|
||||
priority = 1;
|
||||
|
||||
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
|
||||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
|
||||
mqd->cp_gfx_hqd_queue_priority = tmp;
|
||||
}
|
||||
|
||||
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
struct amdgpu_mqd_prop *prop)
|
||||
{
|
||||
|
@ -6609,11 +6631,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
|
|||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
|
||||
mqd->cp_gfx_hqd_vmid = 0;
|
||||
|
||||
/* set up default queue priority level
|
||||
* 0x0 = low priority, 0x1 = high priority */
|
||||
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
|
||||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
|
||||
mqd->cp_gfx_hqd_queue_priority = tmp;
|
||||
/* set up gfx queue priority */
|
||||
gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
|
||||
|
||||
/* set up time quantum */
|
||||
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
|
||||
|
|
|
@ -4563,6 +4563,9 @@ static int gfx_v11_0_hw_init(void *handle)
|
|||
if (adev->gfx.imu.funcs->start_imu)
|
||||
adev->gfx.imu.funcs->start_imu(adev);
|
||||
}
|
||||
|
||||
/* disable gpa mode in backdoor loading */
|
||||
gfx_v11_0_disable_gpa_mode(adev);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4781,19 +4784,17 @@ static int gfx_v11_0_soft_reset(void *handle)
|
|||
/* Disable MEC parsing/prefetching */
|
||||
gfx_v11_0_cp_compute_enable(adev, false);
|
||||
|
||||
if (grbm_soft_reset) {
|
||||
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
|
||||
tmp |= grbm_soft_reset;
|
||||
dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
|
||||
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
|
||||
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
|
||||
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
|
||||
tmp |= grbm_soft_reset;
|
||||
dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
|
||||
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
|
||||
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
|
||||
|
||||
udelay(50);
|
||||
udelay(50);
|
||||
|
||||
tmp &= ~grbm_soft_reset;
|
||||
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
|
||||
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
|
||||
}
|
||||
tmp &= ~grbm_soft_reset;
|
||||
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
|
||||
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
|
||||
|
||||
/* Wait a little for things to settle down */
|
||||
udelay(50);
|
||||
|
@ -6293,6 +6294,11 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
|
||||
static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
adev->gfx.imu.mode = MISSION_MODE;
|
||||
else
|
||||
adev->gfx.imu.mode = DEBUG_MODE;
|
||||
|
||||
adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
|
||||
}
|
||||
|
||||
|
|
|
@ -456,7 +456,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
|||
gmc_v10_0_flush_gpu_tlb(adev, vmid,
|
||||
AMDGPU_GFXHUB_0, flush_type);
|
||||
}
|
||||
break;
|
||||
if (!adev->enable_mes)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "nbio_v4_3.h"
|
||||
#include "gfxhub_v3_0.h"
|
||||
#include "mmhub_v3_0.h"
|
||||
#include "mmhub_v3_0_1.h"
|
||||
#include "mmhub_v3_0_2.h"
|
||||
#include "athub_v3_0.h"
|
||||
|
||||
|
@ -267,7 +268,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
|
|||
/* For SRIOV run time, driver shouldn't access the register through MMIO
|
||||
* Directly use kiq to do the vm invalidation instead
|
||||
*/
|
||||
if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
|
||||
if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) &&
|
||||
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
|
||||
const unsigned eng = 17;
|
||||
|
@ -343,7 +344,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
|||
gmc_v11_0_flush_gpu_tlb(adev, vmid,
|
||||
AMDGPU_GFXHUB_0, flush_type);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -548,6 +548,9 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
|
|||
static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (adev->ip_versions[MMHUB_HWIP][0]) {
|
||||
case IP_VERSION(3, 0, 1):
|
||||
adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
|
||||
break;
|
||||
case IP_VERSION(3, 0, 2):
|
||||
adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
|
||||
break;
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <linux/firmware.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_imu.h"
|
||||
#include "amdgpu_dpm.h"
|
||||
|
||||
#include "gc/gc_11_0_0_offset.h"
|
||||
#include "gc/gc_11_0_0_sh_mask.h"
|
||||
|
@ -117,32 +118,9 @@ static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void imu_v11_0_setup(struct amdgpu_device *adev)
|
||||
static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev)
|
||||
{
|
||||
int imu_reg_val;
|
||||
|
||||
//enable IMU debug mode
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
|
||||
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
|
||||
imu_reg_val |= 0x1;
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
|
||||
|
||||
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
|
||||
imu_reg_val |= 0x10007;
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
|
||||
}
|
||||
|
||||
static int imu_v11_0_start(struct amdgpu_device *adev)
|
||||
{
|
||||
int imu_reg_val, i;
|
||||
|
||||
//Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
|
||||
imu_reg_val &= 0xfffffffe;
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
|
||||
int i, imu_reg_val = 0;
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
|
||||
|
@ -159,6 +137,41 @@ static int imu_v11_0_start(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void imu_v11_0_setup(struct amdgpu_device *adev)
|
||||
{
|
||||
int imu_reg_val;
|
||||
|
||||
//enable IMU debug mode
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
|
||||
|
||||
if (adev->gfx.imu.mode == DEBUG_MODE) {
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
|
||||
imu_reg_val |= 0x1;
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
|
||||
}
|
||||
|
||||
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
|
||||
imu_reg_val |= 0x10007;
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
|
||||
}
|
||||
|
||||
static int imu_v11_0_start(struct amdgpu_device *adev)
|
||||
{
|
||||
int imu_reg_val;
|
||||
|
||||
//Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
|
||||
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
|
||||
imu_reg_val &= 0xfffffffe;
|
||||
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
amdgpu_dpm_set_gfx_power_up_by_imu(adev);
|
||||
|
||||
return imu_v11_0_wait_for_reset_status(adev);
|
||||
}
|
||||
|
||||
static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
|
||||
{
|
||||
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
|
||||
|
@ -364,4 +377,5 @@ const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
|
|||
.setup_imu = imu_v11_0_setup,
|
||||
.start_imu = imu_v11_0_start,
|
||||
.program_rlc_ram = imu_v11_0_program_rlc_ram,
|
||||
.wait_for_reset_status = imu_v11_0_wait_for_reset_status,
|
||||
};
|
||||
|
|
|
@ -156,7 +156,13 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
|
|||
input->gang_global_priority_level;
|
||||
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
|
||||
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
|
||||
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
|
||||
|
||||
if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
|
||||
AMDGPU_MES_API_VERSION_SHIFT) >= 2)
|
||||
mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
|
||||
else
|
||||
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
|
||||
|
||||
mes_add_queue_pkt.queue_type =
|
||||
convert_to_mes_queue_type(input->queue_type);
|
||||
mes_add_queue_pkt.paging = input->paging;
|
||||
|
@ -165,6 +171,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
|
|||
mes_add_queue_pkt.gws_size = input->gws_size;
|
||||
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
|
||||
mes_add_queue_pkt.tma_addr = input->tma_addr;
|
||||
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
|
||||
|
||||
mes_add_queue_pkt.api_status.api_completion_fence_addr =
|
||||
mes->ring.fence_drv.gpu_addr;
|
||||
|
@ -267,6 +274,58 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
|
|||
&mes_status_pkt, sizeof(mes_status_pkt));
|
||||
}
|
||||
|
||||
static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
|
||||
struct mes_misc_op_input *input)
|
||||
{
|
||||
union MESAPI__MISC misc_pkt;
|
||||
|
||||
memset(&misc_pkt, 0, sizeof(misc_pkt));
|
||||
|
||||
misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
|
||||
misc_pkt.header.opcode = MES_SCH_API_MISC;
|
||||
misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
|
||||
|
||||
switch (input->op) {
|
||||
case MES_MISC_OP_READ_REG:
|
||||
misc_pkt.opcode = MESAPI_MISC__READ_REG;
|
||||
misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
|
||||
misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
|
||||
break;
|
||||
case MES_MISC_OP_WRITE_REG:
|
||||
misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
|
||||
misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
|
||||
misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
|
||||
break;
|
||||
case MES_MISC_OP_WRM_REG_WAIT:
|
||||
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
|
||||
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
|
||||
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
|
||||
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
|
||||
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
|
||||
misc_pkt.wait_reg_mem.reg_offset2 = 0;
|
||||
break;
|
||||
case MES_MISC_OP_WRM_REG_WR_WAIT:
|
||||
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
|
||||
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
|
||||
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
|
||||
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
|
||||
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
|
||||
misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("unsupported misc op (%d) \n", input->op);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
misc_pkt.api_status.api_completion_fence_addr =
|
||||
mes->ring.fence_drv.gpu_addr;
|
||||
misc_pkt.api_status.api_completion_fence_value =
|
||||
++mes->ring.fence_drv.sync_seq;
|
||||
|
||||
return mes_v11_0_submit_pkt_and_poll_completion(mes,
|
||||
&misc_pkt, sizeof(misc_pkt));
|
||||
}
|
||||
|
||||
static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
|
||||
{
|
||||
int i;
|
||||
|
@ -312,6 +371,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
|
|||
mes_set_hw_res_pkt.disable_reset = 1;
|
||||
mes_set_hw_res_pkt.disable_mes_log = 1;
|
||||
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
|
||||
mes_set_hw_res_pkt.oversubscription_timer = 50;
|
||||
|
||||
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
|
||||
mes->ring.fence_drv.gpu_addr;
|
||||
|
@ -328,6 +388,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
|
|||
.unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
|
||||
.suspend_gang = mes_v11_0_suspend_gang,
|
||||
.resume_gang = mes_v11_0_resume_gang,
|
||||
.misc_op = mes_v11_0_misc_op,
|
||||
};
|
||||
|
||||
static int mes_v11_0_init_microcode(struct amdgpu_device *adev,
|
||||
|
@ -858,6 +919,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
|
|||
mes_v11_0_queue_init_register(ring);
|
||||
}
|
||||
|
||||
/* get MES scheduler/KIQ versions */
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
soc21_grbm_select(adev, 3, pipe, 0, 0);
|
||||
|
||||
if (pipe == AMDGPU_MES_SCHED_PIPE)
|
||||
adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
|
||||
else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
|
||||
adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
|
||||
|
||||
soc21_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1120,6 +1193,7 @@ static int mes_v11_0_hw_init(void *handle)
|
|||
* with MES enabled.
|
||||
*/
|
||||
adev->gfx.kiq.ring.sched.ready = false;
|
||||
adev->mes.ring.sched.ready = true;
|
||||
|
||||
return 0;
|
||||
|
||||
|
|
555
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
Normal file
555
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
Normal file
|
@ -0,0 +1,555 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "mmhub_v3_0_1.h"
|
||||
|
||||
#include "mmhub/mmhub_3_0_1_offset.h"
|
||||
#include "mmhub/mmhub_3_0_1_sh_mask.h"
|
||||
#include "navi10_enum.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
|
||||
#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
|
||||
#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
|
||||
#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
|
||||
|
||||
static const char *mmhub_client_ids_v3_0_1[][2] = {
|
||||
[0][0] = "VMC",
|
||||
[4][0] = "DCEDMC",
|
||||
[5][0] = "DCEVGA",
|
||||
[6][0] = "MP0",
|
||||
[7][0] = "MP1",
|
||||
[8][0] = "MPIO",
|
||||
[16][0] = "HDP",
|
||||
[17][0] = "LSDMA",
|
||||
[18][0] = "JPEG",
|
||||
[19][0] = "VCNU0",
|
||||
[21][0] = "VSCH",
|
||||
[22][0] = "VCNU1",
|
||||
[23][0] = "VCN1",
|
||||
[32+20][0] = "VCN0",
|
||||
[2][1] = "DBGUNBIO",
|
||||
[3][1] = "DCEDWB",
|
||||
[4][1] = "DCEDMC",
|
||||
[5][1] = "DCEVGA",
|
||||
[6][1] = "MP0",
|
||||
[7][1] = "MP1",
|
||||
[8][1] = "MPIO",
|
||||
[10][1] = "DBGU0",
|
||||
[11][1] = "DBGU1",
|
||||
[12][1] = "DBGU2",
|
||||
[13][1] = "DBGU3",
|
||||
[14][1] = "XDP",
|
||||
[15][1] = "OSSSYS",
|
||||
[16][1] = "HDP",
|
||||
[17][1] = "LSDMA",
|
||||
[18][1] = "JPEG",
|
||||
[19][1] = "VCNU0",
|
||||
[20][1] = "VCN0",
|
||||
[21][1] = "VSCH",
|
||||
[22][1] = "VCNU1",
|
||||
[23][1] = "VCN1",
|
||||
};
|
||||
|
||||
static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
|
||||
uint32_t flush_type)
|
||||
{
|
||||
u32 req = 0;
|
||||
|
||||
/* invalidate using legacy mode on vmid*/
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
|
||||
PER_VMID_INVALIDATE_REQ, 1 << vmid);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
|
||||
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
|
||||
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
static void
|
||||
mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
|
||||
uint32_t status)
|
||||
{
|
||||
uint32_t cid, rw;
|
||||
const char *mmhub_cid = NULL;
|
||||
|
||||
cid = REG_GET_FIELD(status,
|
||||
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
|
||||
rw = REG_GET_FIELD(status,
|
||||
MMVM_L2_PROTECTION_FAULT_STATUS, RW);
|
||||
|
||||
dev_err(adev->dev,
|
||||
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
|
||||
status);
|
||||
|
||||
switch (adev->ip_versions[MMHUB_HWIP][0]) {
|
||||
case IP_VERSION(3, 0, 1):
|
||||
mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
|
||||
break;
|
||||
default:
|
||||
mmhub_cid = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
|
||||
mmhub_cid ? mmhub_cid : "unknown", cid);
|
||||
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
|
||||
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
|
||||
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
|
||||
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
|
||||
dev_err(adev->dev, "\t RW: 0x%x\n", rw);
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint64_t page_table_base)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
|
||||
hub->ctx_addr_distance * vmid,
|
||||
lower_32_bits(page_table_base));
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
|
||||
hub->ctx_addr_distance * vmid,
|
||||
upper_32_bits(page_table_base));
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
|
||||
mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base);
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
|
||||
(u32)(adev->gmc.gart_start >> 12));
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
|
||||
(u32)(adev->gmc.gart_start >> 44));
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
|
||||
(u32)(adev->gmc.gart_end >> 12));
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
|
||||
(u32)(adev->gmc.gart_end >> 44));
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t value;
|
||||
uint32_t tmp;
|
||||
|
||||
/* Program the AGP BAR */
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
|
||||
|
||||
/*
|
||||
* the new L1 policy will block SRIOV guest from writing
|
||||
* these regs, and they will be programed at host.
|
||||
* so skip programing these regs.
|
||||
*/
|
||||
/* Program the system aperture low logical page number. */
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
|
||||
adev->gmc.vram_start >> 18);
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
adev->gmc.vram_end >> 18);
|
||||
|
||||
/* Set default page address. */
|
||||
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
|
||||
adev->vm_manager.vram_base_offset;
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
|
||||
(u32)(value >> 12));
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
|
||||
(u32)(value >> 44));
|
||||
|
||||
/* Program "protection fault". */
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
|
||||
(u32)(adev->dummy_page_addr >> 12));
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
|
||||
(u32)((u64)adev->dummy_page_addr >> 44));
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
|
||||
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
/* Setup TLB control */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
|
||||
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
|
||||
MTYPE, MTYPE_UC); /* UC, uncached */
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
|
||||
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
|
||||
/* XXX for emulation, Refer to closed source code.*/
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
|
||||
0);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
|
||||
|
||||
tmp = regMMVM_L2_CNTL3_DEFAULT;
|
||||
if (adev->gmc.translate_further) {
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
|
||||
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
|
||||
} else {
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
|
||||
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
|
||||
}
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
|
||||
|
||||
tmp = regMMVM_L2_CNTL4_DEFAULT;
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
|
||||
|
||||
tmp = regMMVM_L2_CNTL5_DEFAULT;
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
|
||||
WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
|
||||
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
|
||||
{
|
||||
WREG32_SOC15(MMHUB, 0,
|
||||
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
|
||||
0xFFFFFFFF);
|
||||
WREG32_SOC15(MMHUB, 0,
|
||||
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
|
||||
0x0000000F);
|
||||
|
||||
WREG32_SOC15(MMHUB, 0,
|
||||
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
|
||||
WREG32_SOC15(MMHUB, 0,
|
||||
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
|
||||
0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
|
||||
0);
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
int i;
|
||||
uint32_t tmp;
|
||||
|
||||
for (i = 0; i <= 14; i++) {
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
|
||||
adev->vm_manager.num_level);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
PAGE_TABLE_BLOCK_SIZE,
|
||||
adev->vm_manager.block_size - 9);
|
||||
/* Send no-retry XNACK on fault to suppress VM fault storm. */
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
|
||||
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
|
||||
!amdgpu_noretry);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
|
||||
i * hub->ctx_distance, tmp);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
|
||||
i * hub->ctx_addr_distance, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
|
||||
i * hub->ctx_addr_distance, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
|
||||
i * hub->ctx_addr_distance,
|
||||
lower_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
|
||||
i * hub->ctx_addr_distance,
|
||||
upper_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
}
|
||||
|
||||
hub->vm_cntx_cntl = tmp;
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 18; ++i) {
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
|
||||
i * hub->eng_addr_distance, 0xffffffff);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
|
||||
i * hub->eng_addr_distance, 0x1f);
|
||||
}
|
||||
}
|
||||
|
||||
static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
|
||||
{
|
||||
/* GART Enable. */
|
||||
mmhub_v3_0_1_init_gart_aperture_regs(adev);
|
||||
mmhub_v3_0_1_init_system_aperture_regs(adev);
|
||||
mmhub_v3_0_1_init_tlb_regs(adev);
|
||||
mmhub_v3_0_1_init_cache_regs(adev);
|
||||
|
||||
mmhub_v3_0_1_enable_system_domain(adev);
|
||||
mmhub_v3_0_1_disable_identity_aperture(adev);
|
||||
mmhub_v3_0_1_setup_vmid_config(adev);
|
||||
mmhub_v3_0_1_program_invalidation(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
u32 tmp;
|
||||
u32 i;
|
||||
|
||||
/* Disable all tables */
|
||||
for (i = 0; i < 16; i++)
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
|
||||
i * hub->ctx_distance, 0);
|
||||
|
||||
/* Setup TLB control */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @value: true redirects VM faults to the default page
|
||||
*/
|
||||
static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev,
|
||||
bool value)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
if (!value) {
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
CRASH_ON_NO_RETRY_FAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
CRASH_ON_RETRY_FAULT, 1);
|
||||
}
|
||||
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
|
||||
}
|
||||
|
||||
static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
|
||||
.print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status,
|
||||
.get_invalidate_req = mmhub_v3_0_1_get_invalidate_req,
|
||||
};
|
||||
|
||||
static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
|
||||
hub->ctx0_ptb_addr_lo32 =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
|
||||
hub->ctx0_ptb_addr_hi32 =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
|
||||
hub->vm_inv_eng0_sem =
|
||||
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
|
||||
hub->vm_inv_eng0_req =
|
||||
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
|
||||
hub->vm_inv_eng0_ack =
|
||||
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
|
||||
hub->vm_context0_cntl =
|
||||
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
|
||||
hub->vm_l2_pro_fault_status =
|
||||
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
|
||||
hub->vm_l2_pro_fault_cntl =
|
||||
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
|
||||
|
||||
hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
|
||||
hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
|
||||
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
|
||||
hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
|
||||
regMMVM_INVALIDATE_ENG0_REQ;
|
||||
hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
|
||||
regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
|
||||
|
||||
hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
|
||||
|
||||
hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
|
||||
}
|
||||
|
||||
static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
|
||||
{
|
||||
u64 base;
|
||||
|
||||
base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
|
||||
base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
|
||||
base <<= 24;
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
|
||||
{
|
||||
return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
|
||||
enum amd_clockgating_state state)
|
||||
{
|
||||
mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE);
|
||||
mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
|
||||
state == AMD_CG_STATE_GATE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
|
||||
.init = mmhub_v3_0_1_init,
|
||||
.get_fb_location = mmhub_v3_0_1_get_fb_location,
|
||||
.get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset,
|
||||
.gart_enable = mmhub_v3_0_1_gart_enable,
|
||||
.set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default,
|
||||
.gart_disable = mmhub_v3_0_1_gart_disable,
|
||||
.set_clockgating = mmhub_v3_0_1_set_clockgating,
|
||||
.get_clockgating = mmhub_v3_0_1_get_clockgating,
|
||||
.setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs,
|
||||
};
|
28
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
Normal file
28
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef __MMHUB_V3_0_1_H__
|
||||
#define __MMHUB_V3_0_1_H__
|
||||
|
||||
extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs;
|
||||
|
||||
#endif
|
|
@ -284,7 +284,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
|
|||
if (amdgpu_device_should_recover_gpu(adev)
|
||||
&& (!amdgpu_device_has_job_running(adev) ||
|
||||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
|
||||
amdgpu_device_gpu_recover_imp(adev, NULL);
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
|
|
|
@ -311,7 +311,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
|
|||
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
|
||||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
|
||||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
|
||||
amdgpu_device_gpu_recover_imp(adev, NULL);
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
|
|
|
@ -523,7 +523,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
|
|||
|
||||
/* Trigger recovery due to world switch failure */
|
||||
if (amdgpu_device_should_recover_gpu(adev))
|
||||
amdgpu_device_gpu_recover_imp(adev, NULL);
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
|
|
|
@ -547,7 +547,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
|
|||
{
|
||||
uint32_t reg, reg_data;
|
||||
|
||||
if (adev->asic_type != CHIP_SIENNA_CICHLID)
|
||||
if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0))
|
||||
return;
|
||||
|
||||
reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
|
||||
|
|
|
@ -240,8 +240,11 @@ static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
|
|||
{
|
||||
uint32_t def, data;
|
||||
|
||||
if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
|
||||
return;
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
|
||||
if (enable) {
|
||||
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
|
||||
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
|
||||
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
|
||||
|
@ -266,9 +269,12 @@ static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
|
|||
{
|
||||
uint32_t def, data;
|
||||
|
||||
if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
|
||||
return;
|
||||
|
||||
/* TODO: need update in future */
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
|
||||
if (enable) {
|
||||
data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
|
||||
} else {
|
||||
data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
|
||||
|
@ -344,6 +350,121 @@ static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
|
|||
return rom_offset;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PCIEASPM
|
||||
static void nbio_v4_3_program_ltr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t def, data;
|
||||
|
||||
def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
|
||||
data = 0x35EB;
|
||||
data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
|
||||
data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
|
||||
data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
|
||||
if (adev->pdev->ltr_path)
|
||||
data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
|
||||
else
|
||||
data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
|
||||
{
|
||||
#ifdef CONFIG_PCIEASPM
|
||||
uint32_t def, data;
|
||||
|
||||
if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) &&
|
||||
!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0)))
|
||||
return;
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
|
||||
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
|
||||
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
|
||||
data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7);
|
||||
data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
|
||||
data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
|
||||
data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
|
||||
data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
|
||||
data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
|
||||
data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
|
||||
|
||||
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
|
||||
data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
|
||||
PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
|
||||
data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4);
|
||||
data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
|
||||
data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
|
||||
|
||||
nbio_v4_3_program_ltr(adev);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
|
||||
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
|
||||
data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
|
||||
data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
|
||||
data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
|
||||
data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
|
||||
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
|
||||
|
||||
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
|
||||
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
|
||||
if (def != data)
|
||||
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
|
||||
#endif
|
||||
}
|
||||
|
||||
const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
|
||||
.get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
|
||||
.get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
|
||||
|
@ -365,4 +486,5 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
|
|||
.init_registers = nbio_v4_3_init_registers,
|
||||
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
|
||||
.get_rom_offset = nbio_v4_3_get_rom_offset,
|
||||
.program_aspm = nbio_v4_3_program_aspm,
|
||||
};
|
||||
|
|
|
@ -58,10 +58,16 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan
|
|||
bool use_doorbell, int doorbell_index,
|
||||
int doorbell_size)
|
||||
{
|
||||
u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE);
|
||||
u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
|
||||
u32 doorbell_range = RREG32_PCIE_PORT(reg);
|
||||
|
||||
if (use_doorbell) {
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range,
|
||||
GDC0_BIF_CSDMA_DOORBELL_RANGE,
|
||||
OFFSET, doorbell_index);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range,
|
||||
GDC0_BIF_CSDMA_DOORBELL_RANGE,
|
||||
SIZE, doorbell_size);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range,
|
||||
GDC0_BIF_SDMA0_DOORBELL_RANGE,
|
||||
OFFSET, doorbell_index);
|
||||
|
|
|
@ -39,7 +39,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
|
|||
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
|
||||
|
||||
/* For large FW files the time to complete can be very long */
|
||||
#define USBC_PD_POLLING_LIMIT_S 240
|
||||
|
@ -103,6 +105,10 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
|
|||
case IP_VERSION(13, 0, 0):
|
||||
case IP_VERSION(13, 0, 7):
|
||||
err = psp_init_sos_microcode(psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
/* It's not necessary to load ras ta on Guest side */
|
||||
err = psp_init_ta_microcode(psp, chip_name);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
|
|
|
@ -310,6 +310,7 @@ static enum amd_reset_method
|
|||
soc21_asic_reset_method(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
|
||||
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
|
||||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
|
||||
return amdgpu_reset_method;
|
||||
|
||||
|
@ -320,6 +321,8 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
|
|||
switch (adev->ip_versions[MP1_HWIP][0]) {
|
||||
case IP_VERSION(13, 0, 0):
|
||||
return AMD_RESET_METHOD_MODE1;
|
||||
case IP_VERSION(13, 0, 4):
|
||||
return AMD_RESET_METHOD_MODE2;
|
||||
default:
|
||||
if (amdgpu_dpm_is_baco_supported(adev))
|
||||
return AMD_RESET_METHOD_BACO;
|
||||
|
@ -341,6 +344,10 @@ static int soc21_asic_reset(struct amdgpu_device *adev)
|
|||
dev_info(adev->dev, "BACO reset\n");
|
||||
ret = amdgpu_dpm_baco_reset(adev);
|
||||
break;
|
||||
case AMD_RESET_METHOD_MODE2:
|
||||
dev_info(adev->dev, "MODE2 reset\n");
|
||||
ret = amdgpu_dpm_mode2_reset(adev);
|
||||
break;
|
||||
default:
|
||||
dev_info(adev->dev, "MODE1 reset\n");
|
||||
ret = amdgpu_device_mode1_reset(adev);
|
||||
|
@ -379,11 +386,12 @@ static void soc21_pcie_gen3_enable(struct amdgpu_device *adev)
|
|||
|
||||
static void soc21_program_aspm(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
if (amdgpu_aspm == 0)
|
||||
if (!amdgpu_device_should_use_aspm(adev))
|
||||
return;
|
||||
|
||||
/* todo */
|
||||
if (!(adev->flags & AMD_IS_APU) &&
|
||||
(adev->nbio.funcs->program_aspm))
|
||||
adev->nbio.funcs->program_aspm(adev);
|
||||
}
|
||||
|
||||
static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev,
|
||||
|
@ -555,8 +563,11 @@ static int soc21_common_early_init(void *handle)
|
|||
adev->cg_flags =
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGLS |
|
||||
AMD_CG_SUPPORT_REPEATER_FGCG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG;
|
||||
AMD_CG_SUPPORT_JPEG_MGCG |
|
||||
AMD_CG_SUPPORT_ATHUB_MGCG |
|
||||
AMD_CG_SUPPORT_ATHUB_LS;
|
||||
adev->pg_flags =
|
||||
AMD_PG_SUPPORT_VCN |
|
||||
AMD_PG_SUPPORT_VCN_DPG |
|
||||
|
|
|
@ -119,6 +119,24 @@ static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device
|
|||
*error_count += 1;
|
||||
|
||||
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
|
||||
|
||||
if (ras->umc_ecc.record_ce_addr_supported) {
|
||||
uint64_t err_addr, soc_pa;
|
||||
uint32_t channel_index =
|
||||
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||
|
||||
err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
|
||||
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||
/* translate umc channel address to soc pa, 3 parts are included */
|
||||
soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
|
||||
ADDR_OF_256B_BLOCK(channel_index) |
|
||||
OFFSET_IN_256B_BLOCK(err_addr);
|
||||
|
||||
/* The umc channel bits are not original values, they are hashed */
|
||||
SET_CHANNEL_HASH(channel_index, soc_pa);
|
||||
|
||||
dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -251,7 +269,9 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev
|
|||
|
||||
static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
|
||||
uint32_t umc_reg_offset,
|
||||
unsigned long *error_count)
|
||||
unsigned long *error_count,
|
||||
uint32_t ch_inst,
|
||||
uint32_t umc_inst)
|
||||
{
|
||||
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
|
||||
uint32_t ecc_err_cnt, ecc_err_cnt_addr;
|
||||
|
@ -295,6 +315,31 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
|
|||
*error_count += 1;
|
||||
|
||||
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
|
||||
|
||||
{
|
||||
uint64_t err_addr, soc_pa;
|
||||
uint32_t mc_umc_addrt0;
|
||||
uint32_t channel_index;
|
||||
|
||||
mc_umc_addrt0 =
|
||||
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
|
||||
|
||||
channel_index =
|
||||
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||
|
||||
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
|
||||
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||
|
||||
/* translate umc channel address to soc pa, 3 parts are included */
|
||||
soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
|
||||
ADDR_OF_256B_BLOCK(channel_index) |
|
||||
OFFSET_IN_256B_BLOCK(err_addr);
|
||||
|
||||
/* The umc channel bits are not original values, they are hashed */
|
||||
SET_CHANNEL_HASH(channel_index, soc_pa);
|
||||
|
||||
dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -395,7 +440,8 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
|
|||
ch_inst);
|
||||
umc_v6_7_query_correctable_error_count(adev,
|
||||
umc_reg_offset,
|
||||
&(err_data->ce_count));
|
||||
&(err_data->ce_count),
|
||||
ch_inst, umc_inst);
|
||||
umc_v6_7_querry_uncorrectable_error_count(adev,
|
||||
umc_reg_offset,
|
||||
&(err_data->ue_count));
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
#include "soc15d.h"
|
||||
#include "soc15_hw_ip.h"
|
||||
#include "vcn_v2_0.h"
|
||||
#include "vcn_sw_ring.h"
|
||||
|
||||
#include "vcn/vcn_4_0_0_offset.h"
|
||||
#include "vcn/vcn_4_0_0_sh_mask.h"
|
||||
|
@ -45,15 +44,12 @@
|
|||
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
|
||||
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
|
||||
|
||||
bool unifiedQ_enabled = false;
|
||||
|
||||
static int amdgpu_ih_clientid_vcns[] = {
|
||||
SOC15_IH_CLIENTID_VCN,
|
||||
SOC15_IH_CLIENTID_VCN1
|
||||
};
|
||||
|
||||
static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
|
||||
static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev);
|
||||
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
|
||||
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
static int vcn_v4_0_set_powergating_state(void *handle,
|
||||
enum amd_powergating_state state);
|
||||
|
@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle)
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (unifiedQ_enabled) {
|
||||
adev->vcn.num_vcn_inst = 1;
|
||||
adev->vcn.num_enc_rings = 1;
|
||||
} else {
|
||||
adev->vcn.num_enc_rings = 2;
|
||||
}
|
||||
/* re-use enc ring as unified ring */
|
||||
adev->vcn.num_enc_rings = 1;
|
||||
|
||||
if (!unifiedQ_enabled)
|
||||
vcn_v4_0_set_dec_ring_funcs(adev);
|
||||
|
||||
vcn_v4_0_set_enc_ring_funcs(adev);
|
||||
vcn_v4_0_set_unified_ring_funcs(adev);
|
||||
vcn_v4_0_set_irq_funcs(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
|
||||
const struct common_firmware_header *hdr;
|
||||
|
||||
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
|
||||
adev->firmware.fw_size +=
|
||||
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
|
||||
DRM_INFO("PSP loading VCN firmware\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_sw_init - sw init for VCN block
|
||||
*
|
||||
|
@ -111,17 +86,14 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
|
|||
static int vcn_v4_0_sw_init(void *handle)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
int i, j, r;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int i, r;
|
||||
|
||||
r = amdgpu_vcn_sw_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (unifiedQ_enabled)
|
||||
amdgpu_vcn_setup_unified_queue_ucode(adev);
|
||||
else
|
||||
amdgpu_vcn_setup_ucode(adev);
|
||||
amdgpu_vcn_setup_ucode(adev);
|
||||
|
||||
r = amdgpu_vcn_resume(adev);
|
||||
if (r)
|
||||
|
@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle)
|
|||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
|
||||
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
/* VCN DEC TRAP */
|
||||
|
||||
atomic_set(&adev->vcn.inst[i].sched_score, 0);
|
||||
|
||||
/* VCN UNIFIED TRAP */
|
||||
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
|
||||
VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq);
|
||||
VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
atomic_set(&adev->vcn.inst[i].sched_score, 0);
|
||||
if (!unifiedQ_enabled) {
|
||||
ring = &adev->vcn.inst[i].ring_dec;
|
||||
ring->use_doorbell = true;
|
||||
ring = &adev->vcn.inst[i].ring_enc[0];
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
|
||||
|
||||
/* VCN4 doorbell layout
|
||||
* 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg)
|
||||
* 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue)
|
||||
* 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue)
|
||||
* 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved)
|
||||
* 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue)
|
||||
*/
|
||||
sprintf(ring->name, "vcn_unified_%d", i);
|
||||
|
||||
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1)
|
||||
+ 5 + 8 * i;
|
||||
|
||||
sprintf(ring->name, "vcn_dec_%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
|
||||
AMDGPU_RING_PRIO_DEFAULT,
|
||||
&adev->vcn.inst[i].sched_score);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
|
||||
/* VCN ENC TRAP */
|
||||
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
|
||||
j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->vcn.inst[i].ring_enc[j];
|
||||
ring->use_doorbell = true;
|
||||
|
||||
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
|
||||
|
||||
if (unifiedQ_enabled) {
|
||||
sprintf(ring->name, "vcn_unified%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
|
||||
AMDGPU_RING_PRIO_DEFAULT, NULL);
|
||||
} else {
|
||||
enum amdgpu_ring_priority_level hw_prio;
|
||||
|
||||
hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
|
||||
sprintf(ring->name, "vcn_enc_%d.%d", i, j);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
|
||||
hw_prio, &adev->vcn.inst[i].sched_score);
|
||||
}
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
|
||||
AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
|
||||
fw_shared->present_flag_0 = 0;
|
||||
|
||||
if (unifiedQ_enabled) {
|
||||
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
|
||||
fw_shared->sq.is_enabled = 1;
|
||||
}
|
||||
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
|
||||
fw_shared->sq.is_enabled = 1;
|
||||
|
||||
if (amdgpu_vcnfw_log)
|
||||
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
|
||||
}
|
||||
|
||||
if (!unifiedQ_enabled) {
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
|
||||
adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
|
||||
}
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
|
||||
adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -220,19 +151,19 @@ static int vcn_v4_0_sw_fini(void *handle)
|
|||
int i, r, idx;
|
||||
|
||||
if (drm_dev_enter(&adev->ddev, &idx)) {
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
|
||||
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
|
||||
fw_shared->present_flag_0 = 0;
|
||||
fw_shared->sq.is_enabled = 0;
|
||||
}
|
||||
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
|
||||
fw_shared->present_flag_0 = 0;
|
||||
fw_shared->sq.is_enabled = 0;
|
||||
}
|
||||
|
||||
drm_dev_exit(idx);
|
||||
}
|
||||
drm_dev_exit(idx);
|
||||
}
|
||||
|
||||
r = amdgpu_vcn_suspend(adev);
|
||||
if (r)
|
||||
|
@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle)
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct amdgpu_ring *ring;
|
||||
int i, j, r;
|
||||
int i, r;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
if (unifiedQ_enabled)
|
||||
ring = &adev->vcn.inst[i].ring_enc[0];
|
||||
else
|
||||
ring = &adev->vcn.inst[i].ring_dec;
|
||||
|
||||
ring = &adev->vcn.inst[i].ring_enc[0];
|
||||
|
||||
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
|
||||
((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
|
||||
|
@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle)
|
|||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
|
||||
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
|
||||
ring = &adev->vcn.inst[i].ring_enc[j];
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
|
@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
|
|||
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
|
||||
VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
|
||||
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
|
||||
|
||||
}
|
||||
|
||||
if (!indirect)
|
||||
|
@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
|
|||
volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
|
||||
struct amdgpu_ring *ring;
|
||||
uint32_t tmp;
|
||||
int i;
|
||||
|
||||
/* disable register anti-hang mechanism */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
|
||||
|
@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
|
|||
(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
|
||||
(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
|
||||
|
||||
if (unifiedQ_enabled) {
|
||||
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
|
||||
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
|
||||
} else
|
||||
ring = &adev->vcn.inst[inst_idx].ring_dec;
|
||||
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB4_DB_CTRL__EN_MASK);
|
||||
|
||||
/* program the RB_BASE for ring buffer */
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4,
|
||||
lower_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4,
|
||||
upper_32_bits(ring->gpu_addr));
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));
|
||||
|
||||
/* reseting ring, fw should not check RB ring */
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
|
||||
tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
|
||||
|
||||
/* Initialize the ring buffer's read and write pointers */
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
|
||||
tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
|
||||
tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
|
||||
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0);
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
|
||||
|
||||
if (unifiedQ_enabled)
|
||||
fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET;
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
|
||||
tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
|
||||
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; i++) {
|
||||
ring = &adev->vcn.inst[inst_idx].ring_enc[i];
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB1_DB_CTRL__EN_MASK);
|
||||
|
||||
if (i) {
|
||||
ring = &adev->vcn.inst[inst_idx].ring_enc[1];
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB2_DB_CTRL__EN_MASK);
|
||||
} else {
|
||||
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
|
||||
tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
|
||||
|
||||
WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB1_DB_CTRL__EN_MASK);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
amdgpu_dpm_enable_uvd(adev, true);
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
|
||||
continue;
|
||||
|
@ -1081,15 +961,15 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
|
||||
/* enable VCPU clock */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
|
||||
UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
|
||||
UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
|
||||
|
||||
/* disable master interrupt */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
|
||||
~UVD_MASTINT_EN__VCPU_EN_MASK);
|
||||
~UVD_MASTINT_EN__VCPU_EN_MASK);
|
||||
|
||||
/* enable LMI MC and UMC channels */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
|
||||
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
|
||||
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
|
||||
tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
|
||||
|
@ -1099,10 +979,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
/* setup regUVD_LMI_CTRL */
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
|
||||
WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
|
||||
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
|
||||
UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
|
||||
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
|
||||
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
|
||||
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
|
||||
UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
|
||||
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
|
||||
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
|
||||
|
||||
/* setup regUVD_MPC_CNTL */
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
|
||||
|
@ -1112,37 +992,37 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
|
||||
/* setup UVD_MPC_SET_MUXA0 */
|
||||
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
|
||||
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
|
||||
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
|
||||
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
|
||||
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
|
||||
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
|
||||
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
|
||||
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
|
||||
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
|
||||
|
||||
/* setup UVD_MPC_SET_MUXB0 */
|
||||
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
|
||||
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
|
||||
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
|
||||
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
|
||||
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
|
||||
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
|
||||
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
|
||||
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
|
||||
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
|
||||
|
||||
/* setup UVD_MPC_SET_MUX */
|
||||
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
|
||||
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
|
||||
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
|
||||
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
|
||||
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
|
||||
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
|
||||
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
|
||||
|
||||
vcn_v4_0_mc_resume(adev, i);
|
||||
|
||||
/* VCN global tiling registers */
|
||||
WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
|
||||
adev->gfx.config.gb_addr_config);
|
||||
adev->gfx.config.gb_addr_config);
|
||||
|
||||
/* unblock VCPU register access */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
|
||||
~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
|
||||
~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
|
||||
|
||||
/* release VCPU reset to boot */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
|
||||
~UVD_VCPU_CNTL__BLK_RST_MASK);
|
||||
~UVD_VCPU_CNTL__BLK_RST_MASK);
|
||||
|
||||
for (j = 0; j < 10; ++j) {
|
||||
uint32_t status;
|
||||
|
@ -1166,13 +1046,13 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
if (status & 2)
|
||||
break;
|
||||
|
||||
dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
|
||||
UVD_VCPU_CNTL__BLK_RST_MASK,
|
||||
~UVD_VCPU_CNTL__BLK_RST_MASK);
|
||||
dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
|
||||
UVD_VCPU_CNTL__BLK_RST_MASK,
|
||||
~UVD_VCPU_CNTL__BLK_RST_MASK);
|
||||
mdelay(10);
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
|
||||
~UVD_VCPU_CNTL__BLK_RST_MASK);
|
||||
~UVD_VCPU_CNTL__BLK_RST_MASK);
|
||||
|
||||
mdelay(10);
|
||||
r = -1;
|
||||
|
@ -1180,78 +1060,43 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
if (r) {
|
||||
dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i);
|
||||
dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* enable master interrupt */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
|
||||
UVD_MASTINT_EN__VCPU_EN_MASK,
|
||||
~UVD_MASTINT_EN__VCPU_EN_MASK);
|
||||
UVD_MASTINT_EN__VCPU_EN_MASK,
|
||||
~UVD_MASTINT_EN__VCPU_EN_MASK);
|
||||
|
||||
/* clear the busy bit of VCN_STATUS */
|
||||
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
|
||||
~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
|
||||
~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
|
||||
|
||||
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
|
||||
if (unifiedQ_enabled) {
|
||||
ring = &adev->vcn.inst[i].ring_enc[0];
|
||||
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
|
||||
} else {
|
||||
ring = &adev->vcn.inst[i].ring_dec;
|
||||
|
||||
WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB4_DB_CTRL__EN_MASK);
|
||||
|
||||
/* program the RB_BASE for ring buffer */
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4,
|
||||
lower_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4,
|
||||
upper_32_bits(ring->gpu_addr));
|
||||
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));
|
||||
|
||||
/* resetting ring, fw should not check RB ring */
|
||||
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
|
||||
tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
|
||||
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
|
||||
|
||||
/* Initialize the ring buffer's read and write pointers */
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
|
||||
tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
|
||||
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
|
||||
|
||||
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
|
||||
}
|
||||
ring = &adev->vcn.inst[i].ring_enc[0];
|
||||
WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB1_DB_CTRL__EN_MASK);
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
|
||||
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB1_DB_CTRL__EN_MASK);
|
||||
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
|
||||
if (unifiedQ_enabled)
|
||||
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
|
||||
else {
|
||||
ring = &adev->vcn.inst[i].ring_enc[1];
|
||||
WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL,
|
||||
ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_RB2_DB_CTRL__EN_MASK);
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
}
|
||||
|
||||
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
|
||||
tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
|
||||
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
|
||||
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
|
||||
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
|
||||
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
|
||||
tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
|
||||
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
|
||||
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
|
|||
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
|
||||
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
|
||||
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
|
||||
|
||||
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
|
||||
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF);
|
||||
|
||||
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
|
||||
|
||||
|
@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
|
|||
*/
|
||||
static int vcn_v4_0_stop(struct amdgpu_device *adev)
|
||||
{
|
||||
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
|
||||
uint32_t tmp;
|
||||
int i, r = 0;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
|
||||
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
r = vcn_v4_0_stop_dpg_mode(adev, i);
|
||||
continue;
|
||||
|
@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
|
|||
/* unpause dpg, no need to wait */
|
||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
|
||||
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
|
||||
}
|
||||
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
|
||||
}
|
||||
|
@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
|
|||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_dec_ring_get_rptr - get read pointer
|
||||
* vcn_v4_0_unified_ring_get_rptr - get unified read pointer
|
||||
*
|
||||
* @ring: amdgpu_ring pointer
|
||||
*
|
||||
* Returns the current hardware read pointer
|
||||
* Returns the current hardware unified read pointer
|
||||
*/
|
||||
static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
|
||||
static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4);
|
||||
if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
|
||||
DRM_ERROR("wrong ring id is identified in %s", __func__);
|
||||
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
|
||||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_dec_ring_get_wptr - get write pointer
|
||||
* vcn_v4_0_unified_ring_get_wptr - get unified write pointer
|
||||
*
|
||||
* @ring: amdgpu_ring pointer
|
||||
*
|
||||
* Returns the current hardware write pointer
|
||||
* Returns the current hardware unified write pointer
|
||||
*/
|
||||
static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
|
||||
static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
|
||||
DRM_ERROR("wrong ring id is identified in %s", __func__);
|
||||
|
||||
if (ring->use_doorbell)
|
||||
return *ring->wptr_cpu_addr;
|
||||
else
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4);
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
|
||||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_dec_ring_set_wptr - set write pointer
|
||||
*
|
||||
* @ring: amdgpu_ring pointer
|
||||
*
|
||||
* Commits the write pointer to the hardware
|
||||
*/
|
||||
static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2,
|
||||
lower_32_bits(ring->wptr));
|
||||
}
|
||||
|
||||
if (ring->use_doorbell) {
|
||||
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
|
||||
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
|
||||
} else {
|
||||
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr));
|
||||
}
|
||||
}
|
||||
|
||||
static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = {
|
||||
.type = AMDGPU_RING_TYPE_VCN_DEC,
|
||||
.align_mask = 0x3f,
|
||||
.nop = VCN_DEC_SW_CMD_NO_OP,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v4_0_dec_ring_get_rptr,
|
||||
.get_wptr = vcn_v4_0_dec_ring_get_wptr,
|
||||
.set_wptr = vcn_v4_0_dec_ring_set_wptr,
|
||||
.emit_frame_size =
|
||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
|
||||
VCN_SW_RING_EMIT_FRAME_SIZE,
|
||||
.emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
|
||||
.emit_ib = vcn_dec_sw_ring_emit_ib,
|
||||
.emit_fence = vcn_dec_sw_ring_emit_fence,
|
||||
.emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
|
||||
.test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
|
||||
.test_ib = amdgpu_vcn_dec_sw_ring_test_ib,
|
||||
.insert_nop = amdgpu_ring_insert_nop,
|
||||
.insert_end = vcn_dec_sw_ring_insert_end,
|
||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||
.begin_use = amdgpu_vcn_ring_begin_use,
|
||||
.end_use = amdgpu_vcn_ring_end_use,
|
||||
.emit_wreg = vcn_dec_sw_ring_emit_wreg,
|
||||
.emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
|
||||
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
|
||||
};
|
||||
|
||||
/**
|
||||
* vcn_v4_0_enc_ring_get_rptr - get enc read pointer
|
||||
*
|
||||
* @ring: amdgpu_ring pointer
|
||||
*
|
||||
* Returns the current hardware enc read pointer
|
||||
*/
|
||||
static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
|
||||
else
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2);
|
||||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_enc_ring_get_wptr - get enc write pointer
|
||||
*
|
||||
* @ring: amdgpu_ring pointer
|
||||
*
|
||||
* Returns the current hardware enc write pointer
|
||||
*/
|
||||
static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
|
||||
if (ring->use_doorbell)
|
||||
return *ring->wptr_cpu_addr;
|
||||
else
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
|
||||
} else {
|
||||
if (ring->use_doorbell)
|
||||
return *ring->wptr_cpu_addr;
|
||||
else
|
||||
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* vcn_v4_0_enc_ring_set_wptr - set enc write pointer
|
||||
* vcn_v4_0_unified_ring_set_wptr - set enc write pointer
|
||||
*
|
||||
* @ring: amdgpu_ring pointer
|
||||
*
|
||||
* Commits the enc write pointer to the hardware
|
||||
*/
|
||||
static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
|
||||
if (ring->use_doorbell) {
|
||||
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
|
||||
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
|
||||
} else {
|
||||
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
}
|
||||
if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
|
||||
DRM_ERROR("wrong ring id is identified in %s", __func__);
|
||||
|
||||
if (ring->use_doorbell) {
|
||||
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
|
||||
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
|
||||
} else {
|
||||
if (ring->use_doorbell) {
|
||||
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
|
||||
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
|
||||
} else {
|
||||
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||
}
|
||||
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
}
|
||||
}
|
||||
|
||||
static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
|
||||
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
|
||||
.type = AMDGPU_RING_TYPE_VCN_ENC,
|
||||
.align_mask = 0x3f,
|
||||
.nop = VCN_ENC_CMD_NO_OP,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v4_0_enc_ring_get_rptr,
|
||||
.get_wptr = vcn_v4_0_enc_ring_get_wptr,
|
||||
.set_wptr = vcn_v4_0_enc_ring_set_wptr,
|
||||
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
|
||||
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
|
||||
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
|
||||
.emit_frame_size =
|
||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
|
||||
|
@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
|
|||
.emit_fence = vcn_v2_0_enc_ring_emit_fence,
|
||||
.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
|
||||
.test_ring = amdgpu_vcn_enc_ring_test_ring,
|
||||
.test_ib = amdgpu_vcn_enc_ring_test_ib,
|
||||
.test_ib = amdgpu_vcn_unified_ring_test_ib,
|
||||
.insert_nop = amdgpu_ring_insert_nop,
|
||||
.insert_end = vcn_v2_0_enc_ring_insert_end,
|
||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||
|
@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
|
|||
};
|
||||
|
||||
/**
|
||||
* vcn_v4_0_set_dec_ring_funcs - set dec ring functions
|
||||
* vcn_v4_0_set_unified_ring_funcs - set unified ring functions
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set decode ring functions
|
||||
* Set unified ring functions
|
||||
*/
|
||||
static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
|
||||
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
|
|||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs;
|
||||
adev->vcn.inst[i].ring_dec.me = i;
|
||||
DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i);
|
||||
}
|
||||
}
|
||||
adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs;
|
||||
adev->vcn.inst[i].ring_enc[0].me = i;
|
||||
|
||||
/**
|
||||
* vcn_v4_0_set_enc_ring_funcs - set enc ring functions
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set encode ring functions
|
||||
*/
|
||||
static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
|
||||
adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs;
|
||||
adev->vcn.inst[i].ring_enc[j].me = i;
|
||||
}
|
||||
DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
|
||||
DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
|
|||
DRM_DEBUG("IH: VCN TRAP\n");
|
||||
|
||||
switch (entry->src_id) {
|
||||
case VCN_4_0__SRCID__UVD_TRAP:
|
||||
if (!unifiedQ_enabled) {
|
||||
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
|
||||
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
|
||||
break;
|
||||
case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY:
|
||||
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Unhandled interrupt: %d %d\n",
|
||||
entry->src_id, entry->src_data[0]);
|
||||
|
|
|
@ -25,3 +25,17 @@ config HSA_AMD_SVM
|
|||
preemptions and one based on page faults. To enable page fault
|
||||
based memory management on most GFXv9 GPUs, set the module
|
||||
parameter amdgpu.noretry=0.
|
||||
|
||||
config HSA_AMD_P2P
|
||||
bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
|
||||
depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
|
||||
help
|
||||
Enable peer-to-peer (P2P) communication between AMD GPUs over
|
||||
the PCIe bus. This can improve performance of multi-GPU compute
|
||||
applications and libraries by enabling GPUs to access data directly
|
||||
in peer GPUs' memory without intermediate copies in system memory.
|
||||
|
||||
This P2P feature is only enabled on compatible chipsets, and between
|
||||
GPUs with large memory BARs that expose the entire VRAM in PCIe bus
|
||||
address space within the physical address limits of the GPUs.
|
||||
|
||||
|
|
|
@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
|
|||
static struct class *kfd_class;
|
||||
struct device *kfd_device;
|
||||
|
||||
static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_process_device_data_by_id(p, gpu_id);
|
||||
|
||||
if (pdd)
|
||||
return pdd;
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
|
||||
{
|
||||
mutex_unlock(&pdd->process->mutex);
|
||||
}
|
||||
|
||||
int kfd_chardev_init(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
@ -280,6 +299,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
|||
struct kfd_process_device *pdd;
|
||||
struct queue_properties q_properties;
|
||||
uint32_t doorbell_offset_in_process = 0;
|
||||
struct amdgpu_bo *wptr_bo = NULL;
|
||||
|
||||
memset(&q_properties, 0, sizeof(struct queue_properties));
|
||||
|
||||
|
@ -307,12 +327,49 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
|||
goto err_bind_process;
|
||||
}
|
||||
|
||||
/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
|
||||
* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
|
||||
*/
|
||||
if (dev->shared_resources.enable_mes &&
|
||||
((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
|
||||
>> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
|
||||
struct amdgpu_bo_va_mapping *wptr_mapping;
|
||||
struct amdgpu_vm *wptr_vm;
|
||||
|
||||
wptr_vm = drm_priv_to_vm(pdd->drm_priv);
|
||||
err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
|
||||
if (err)
|
||||
goto err_wptr_map_gart;
|
||||
|
||||
wptr_mapping = amdgpu_vm_bo_lookup_mapping(
|
||||
wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
|
||||
amdgpu_bo_unreserve(wptr_vm->root.bo);
|
||||
if (!wptr_mapping) {
|
||||
pr_err("Failed to lookup wptr bo\n");
|
||||
err = -EINVAL;
|
||||
goto err_wptr_map_gart;
|
||||
}
|
||||
|
||||
wptr_bo = wptr_mapping->bo_va->base.bo;
|
||||
if (wptr_bo->tbo.base.size > PAGE_SIZE) {
|
||||
pr_err("Requested GART mapping for wptr bo larger than one page\n");
|
||||
err = -EINVAL;
|
||||
goto err_wptr_map_gart;
|
||||
}
|
||||
|
||||
err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
|
||||
if (err) {
|
||||
pr_err("Failed to map wptr bo to GART\n");
|
||||
goto err_wptr_map_gart;
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
|
||||
p->pasid,
|
||||
dev->id);
|
||||
|
||||
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
|
||||
&doorbell_offset_in_process);
|
||||
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
|
||||
NULL, NULL, NULL, &doorbell_offset_in_process);
|
||||
if (err != 0)
|
||||
goto err_create_queue;
|
||||
|
||||
|
@ -344,6 +401,9 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
|||
return 0;
|
||||
|
||||
err_create_queue:
|
||||
if (wptr_bo)
|
||||
amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
|
||||
err_wptr_map_gart:
|
||||
err_bind_process:
|
||||
err_pdd:
|
||||
mutex_unlock(&p->mutex);
|
||||
|
@ -958,6 +1018,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_get_available_memory(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
struct kfd_ioctl_get_available_memory_args *args = data;
|
||||
struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
|
||||
|
||||
if (!pdd)
|
||||
return -EINVAL;
|
||||
args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
|
||||
kfd_unlock_pdd(pdd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
|
@ -2361,7 +2434,7 @@ static int criu_restore(struct file *filep,
|
|||
* Set the process to evicted state to avoid running any new queues before all the memory
|
||||
* mappings are ready.
|
||||
*/
|
||||
ret = kfd_process_evict_queues(p);
|
||||
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
|
||||
if (ret)
|
||||
goto exit_unlock;
|
||||
|
||||
|
@ -2480,7 +2553,7 @@ static int criu_process_info(struct file *filep,
|
|||
goto err_unlock;
|
||||
}
|
||||
|
||||
ret = kfd_process_evict_queues(p);
|
||||
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
|
@ -2648,6 +2721,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
|||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
|
||||
kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
|
||||
kfd_ioctl_get_available_memory, 0),
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
|
|
|
@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
|
|||
props->rec_transfer_size =
|
||||
iolink->recommended_transfer_size;
|
||||
|
||||
dev->io_link_count++;
|
||||
dev->node_props.io_links_count++;
|
||||
list_add_tail(&props->list, &dev->io_link_props);
|
||||
break;
|
||||
|
@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
|
|||
props2->node_from = id_to;
|
||||
props2->node_to = id_from;
|
||||
props2->kobj = NULL;
|
||||
to_dev->io_link_count++;
|
||||
to_dev->node_props.io_links_count++;
|
||||
list_add_tail(&props2->list, &to_dev->io_link_props);
|
||||
}
|
||||
|
|
|
@ -75,7 +75,6 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
|
|||
case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
|
||||
case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
|
||||
case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
|
||||
case IP_VERSION(6, 0, 1):
|
||||
kfd->device_info.num_sdma_queues_per_engine = 2;
|
||||
break;
|
||||
case IP_VERSION(4, 2, 0):/* VEGA20 */
|
||||
|
@ -90,6 +89,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
|
|||
case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
|
||||
case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
|
||||
case IP_VERSION(6, 0, 0):
|
||||
case IP_VERSION(6, 0, 1):
|
||||
case IP_VERSION(6, 0, 2):
|
||||
kfd->device_info.num_sdma_queues_per_engine = 8;
|
||||
break;
|
||||
|
@ -837,7 +837,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
|||
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
|
||||
}
|
||||
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm)
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
int r;
|
||||
|
@ -851,7 +851,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
|
|||
return -ESRCH;
|
||||
|
||||
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
|
||||
r = kfd_process_evict_queues(p);
|
||||
r = kfd_process_evict_queues(p, trigger);
|
||||
|
||||
kfd_unref_process(p);
|
||||
return r;
|
||||
|
|
|
@ -177,6 +177,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
|
|||
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
|
||||
struct mes_add_queue_input queue_input;
|
||||
int r, queue_type;
|
||||
uint64_t wptr_addr_off;
|
||||
|
||||
if (dqm->is_hws_hang)
|
||||
return -EIO;
|
||||
|
@ -197,6 +198,14 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
|
|||
queue_input.doorbell_offset = q->properties.doorbell_off;
|
||||
queue_input.mqd_addr = q->gart_mqd_addr;
|
||||
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
|
||||
|
||||
if (q->wptr_bo) {
|
||||
wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
|
||||
queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
|
||||
}
|
||||
|
||||
queue_input.is_kfd_process = 1;
|
||||
|
||||
queue_input.paging = false;
|
||||
queue_input.tba_addr = qpd->tba_addr;
|
||||
queue_input.tma_addr = qpd->tma_addr;
|
||||
|
@ -811,7 +820,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
|
|||
struct mqd_manager *mqd_mgr;
|
||||
struct kfd_process_device *pdd;
|
||||
bool prev_active = false;
|
||||
bool add_queue = false;
|
||||
|
||||
dqm_lock(dqm);
|
||||
pdd = kfd_get_process_device_data(q->device, q->process);
|
||||
|
@ -887,7 +895,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
|
|||
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
|
||||
if (!dqm->dev->shared_resources.enable_mes)
|
||||
retval = map_queues_cpsch(dqm);
|
||||
else if (add_queue)
|
||||
else if (q->properties.is_active)
|
||||
retval = add_queue_mes(dqm, q, &pdd->qpd);
|
||||
} else if (q->properties.is_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
|
|
|
@ -377,8 +377,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
|
||||
mem, &kern_addr, &size);
|
||||
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
|
||||
if (err) {
|
||||
pr_err("Failed to map event page to kernel\n");
|
||||
return err;
|
||||
|
@ -387,7 +386,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
|
|||
err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
|
||||
if (err) {
|
||||
pr_err("Failed to set event page\n");
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
|
||||
return err;
|
||||
}
|
||||
return err;
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "kfd_priv.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "kfd_migrate.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
#ifdef dev_fmt
|
||||
#undef dev_fmt
|
||||
|
@ -402,8 +403,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
static long
|
||||
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct vm_area_struct *vma, uint64_t start,
|
||||
uint64_t end)
|
||||
uint64_t end, uint32_t trigger)
|
||||
{
|
||||
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
|
||||
uint64_t npages = (end - start) >> PAGE_SHIFT;
|
||||
struct kfd_process_device *pdd;
|
||||
struct dma_fence *mfence = NULL;
|
||||
|
@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
migrate.dst = migrate.src + npages;
|
||||
scratch = (dma_addr_t *)(migrate.dst + npages);
|
||||
|
||||
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
|
||||
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
|
||||
0, adev->kfd.dev->id, prange->prefetch_loc,
|
||||
prange->preferred_loc, trigger);
|
||||
|
||||
r = migrate_vma_setup(&migrate);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
|
||||
|
@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
svm_migrate_copy_done(adev, mfence);
|
||||
migrate_vma_finalize(&migrate);
|
||||
|
||||
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
|
||||
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
|
||||
0, adev->kfd.dev->id, trigger);
|
||||
|
||||
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
|
||||
svm_range_free_dma_mappings(prange);
|
||||
|
||||
|
@ -479,6 +490,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
* @prange: range structure
|
||||
* @best_loc: the device to migrate to
|
||||
* @mm: the process mm structure
|
||||
* @trigger: reason of migration
|
||||
*
|
||||
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
|
||||
*
|
||||
|
@ -487,7 +499,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
*/
|
||||
static int
|
||||
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm)
|
||||
struct mm_struct *mm, uint32_t trigger)
|
||||
{
|
||||
unsigned long addr, start, end;
|
||||
struct vm_area_struct *vma;
|
||||
|
@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
|||
break;
|
||||
|
||||
next = min(vma->vm_end, end);
|
||||
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
|
||||
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
|
||||
if (r < 0) {
|
||||
pr_debug("failed %ld to migrate\n", r);
|
||||
break;
|
||||
|
@ -655,8 +667,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
*/
|
||||
static long
|
||||
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct vm_area_struct *vma, uint64_t start, uint64_t end)
|
||||
struct vm_area_struct *vma, uint64_t start, uint64_t end,
|
||||
uint32_t trigger)
|
||||
{
|
||||
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
|
||||
uint64_t npages = (end - start) >> PAGE_SHIFT;
|
||||
unsigned long upages = npages;
|
||||
unsigned long cpages = 0;
|
||||
|
@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
migrate.dst = migrate.src + npages;
|
||||
scratch = (dma_addr_t *)(migrate.dst + npages);
|
||||
|
||||
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
|
||||
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
|
||||
adev->kfd.dev->id, 0, prange->prefetch_loc,
|
||||
prange->preferred_loc, trigger);
|
||||
|
||||
r = migrate_vma_setup(&migrate);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
|
||||
|
@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
|
||||
svm_migrate_copy_done(adev, mfence);
|
||||
migrate_vma_finalize(&migrate);
|
||||
|
||||
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
|
||||
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
|
||||
adev->kfd.dev->id, 0, trigger);
|
||||
|
||||
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
|
||||
|
||||
out_free:
|
||||
|
@ -732,13 +756,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
|||
* svm_migrate_vram_to_ram - migrate svm range from device to system
|
||||
* @prange: range structure
|
||||
* @mm: process mm, use current->mm if NULL
|
||||
* @trigger: reason of migration
|
||||
*
|
||||
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK, otherwise error code
|
||||
*/
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
|
||||
uint32_t trigger)
|
||||
{
|
||||
struct amdgpu_device *adev;
|
||||
struct vm_area_struct *vma;
|
||||
|
@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
|
|||
}
|
||||
|
||||
next = min(vma->vm_end, end);
|
||||
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
|
||||
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
|
||||
if (r < 0) {
|
||||
pr_debug("failed %ld to migrate prange %p\n", r, prange);
|
||||
break;
|
||||
|
@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
|
|||
* @prange: range structure
|
||||
* @best_loc: the device to migrate to
|
||||
* @mm: process mm, use current->mm if NULL
|
||||
* @trigger: reason of migration
|
||||
*
|
||||
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
|
||||
*
|
||||
|
@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
|
|||
*/
|
||||
static int
|
||||
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm)
|
||||
struct mm_struct *mm, uint32_t trigger)
|
||||
{
|
||||
int r, retries = 3;
|
||||
|
||||
|
@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
|||
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
|
||||
|
||||
do {
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
r = svm_migrate_vram_to_ram(prange, mm, trigger);
|
||||
if (r)
|
||||
return r;
|
||||
} while (prange->actual_loc && --retries);
|
||||
|
@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
|||
if (prange->actual_loc)
|
||||
return -EDEADLK;
|
||||
|
||||
return svm_migrate_ram_to_vram(prange, best_loc, mm);
|
||||
return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
|
||||
}
|
||||
|
||||
int
|
||||
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm)
|
||||
struct mm_struct *mm, uint32_t trigger)
|
||||
{
|
||||
if (!prange->actual_loc)
|
||||
return svm_migrate_ram_to_vram(prange, best_loc, mm);
|
||||
return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
|
||||
else
|
||||
return svm_migrate_vram_to_vram(prange, best_loc, mm);
|
||||
return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
|
||||
|
||||
}
|
||||
|
||||
|
@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
|
|||
goto out_unlock_prange;
|
||||
}
|
||||
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
|
||||
if (r)
|
||||
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
|
||||
prange, prange->start, prange->last);
|
||||
|
|
|
@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR {
|
|||
};
|
||||
|
||||
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm);
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
|
||||
struct mm_struct *mm, uint32_t trigger);
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
|
||||
uint32_t trigger);
|
||||
unsigned long
|
||||
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
|
||||
|
||||
|
|
|
@ -100,7 +100,9 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
|||
{
|
||||
struct kfd_cu_info cu_info;
|
||||
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
|
||||
int i, se, sh, cu, cu_bitmap_sh_mul;
|
||||
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
|
||||
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
|
||||
int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
|
||||
|
||||
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
|
||||
|
||||
|
@ -167,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
|||
se_mask[i] = 0;
|
||||
|
||||
i = 0;
|
||||
for (cu = 0; cu < 16; cu++) {
|
||||
for (cu = 0; cu < 16; cu += inc) {
|
||||
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
|
||||
for (se = 0; se < cu_info.num_shader_engines; se++) {
|
||||
if (cu_per_sh[se][sh] > cu) {
|
||||
if (cu_mask[i / 32] & (1 << (i % 32)))
|
||||
se_mask[se] |= 1 << (cu + sh * 16);
|
||||
i++;
|
||||
if (cu_mask[i / 32] & (en_mask << (i % 32)))
|
||||
se_mask[se] |= en_mask << (cu + sh * 16);
|
||||
i += inc;
|
||||
if (i == cu_mask_count)
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -377,6 +377,8 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|||
m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
|
||||
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
|
||||
m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
|
||||
m->sdmax_rlcx_doorbell_offset =
|
||||
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
|
||||
|
||||
|
|
|
@ -571,6 +571,8 @@ struct queue {
|
|||
void *gang_ctx_bo;
|
||||
uint64_t gang_ctx_gpu_addr;
|
||||
void *gang_ctx_cpu_ptr;
|
||||
|
||||
struct amdgpu_bo *wptr_bo;
|
||||
};
|
||||
|
||||
enum KFD_MQD_TYPE {
|
||||
|
@ -945,7 +947,7 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
|
|||
}
|
||||
|
||||
void kfd_unref_process(struct kfd_process *p);
|
||||
int kfd_process_evict_queues(struct kfd_process *p);
|
||||
int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
|
||||
int kfd_process_restore_queues(struct kfd_process *p);
|
||||
void kfd_suspend_all_processes(void);
|
||||
int kfd_resume_all_processes(void);
|
||||
|
@ -1206,6 +1208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
|||
struct file *f,
|
||||
struct queue_properties *properties,
|
||||
unsigned int *qid,
|
||||
struct amdgpu_bo *wptr_bo,
|
||||
const struct kfd_criu_queue_priv_data *q_data,
|
||||
const void *restore_mqd,
|
||||
const void *restore_ctl_stack,
|
||||
|
|
|
@ -43,6 +43,7 @@ struct mm_struct;
|
|||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
/*
|
||||
* List of struct kfd_process (field kfd_process).
|
||||
|
@ -693,7 +694,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
|
|||
struct kfd_dev *dev = pdd->dev;
|
||||
|
||||
if (kptr) {
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
|
||||
kptr = NULL;
|
||||
}
|
||||
|
||||
|
@ -733,7 +734,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
|
|||
}
|
||||
|
||||
if (kptr) {
|
||||
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,
|
||||
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
|
||||
(struct kgd_mem *)*mem, kptr, NULL);
|
||||
if (err) {
|
||||
pr_debug("Map GTT BO to kernel failed\n");
|
||||
|
@ -999,7 +1000,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
|
|||
if (!mem)
|
||||
goto out;
|
||||
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
|
||||
|
||||
out:
|
||||
mutex_unlock(&p->mutex);
|
||||
|
@ -1736,7 +1737,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
|
|||
* Eviction is reference-counted per process-device. This means multiple
|
||||
* evictions from different sources can be nested safely.
|
||||
*/
|
||||
int kfd_process_evict_queues(struct kfd_process *p)
|
||||
int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
|
||||
{
|
||||
int r = 0;
|
||||
int i;
|
||||
|
@ -1745,6 +1746,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
|
|||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_process_device *pdd = p->pdds[i];
|
||||
|
||||
kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
|
||||
trigger);
|
||||
|
||||
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd);
|
||||
/* evict return -EIO if HWS is hang or asic is resetting, in this case
|
||||
|
@ -1769,6 +1773,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
|
|||
|
||||
if (n_evicted == 0)
|
||||
break;
|
||||
|
||||
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
|
||||
|
||||
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd))
|
||||
pr_err("Failed to restore queues\n");
|
||||
|
@ -1788,6 +1795,8 @@ int kfd_process_restore_queues(struct kfd_process *p)
|
|||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_process_device *pdd = p->pdds[i];
|
||||
|
||||
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
|
||||
|
||||
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd);
|
||||
if (r) {
|
||||
|
@ -1849,7 +1858,7 @@ static void evict_process_worker(struct work_struct *work)
|
|||
flush_delayed_work(&p->restore_work);
|
||||
|
||||
pr_debug("Started evicting pasid 0x%x\n", p->pasid);
|
||||
ret = kfd_process_evict_queues(p);
|
||||
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
|
||||
if (!ret) {
|
||||
dma_fence_signal(p->ef);
|
||||
dma_fence_put(p->ef);
|
||||
|
@ -1916,7 +1925,7 @@ void kfd_suspend_all_processes(void)
|
|||
cancel_delayed_work_sync(&p->eviction_work);
|
||||
cancel_delayed_work_sync(&p->restore_work);
|
||||
|
||||
if (kfd_process_evict_queues(p))
|
||||
if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
|
||||
pr_err("Failed to suspend process 0x%x\n", p->pasid);
|
||||
dma_fence_signal(p->ef);
|
||||
dma_fence_put(p->ef);
|
||||
|
|
|
@ -180,7 +180,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
|
|||
static int init_user_queue(struct process_queue_manager *pqm,
|
||||
struct kfd_dev *dev, struct queue **q,
|
||||
struct queue_properties *q_properties,
|
||||
struct file *f, unsigned int qid)
|
||||
struct file *f, struct amdgpu_bo *wptr_bo,
|
||||
unsigned int qid)
|
||||
{
|
||||
int retval;
|
||||
|
||||
|
@ -210,6 +211,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
|
|||
goto cleanup;
|
||||
}
|
||||
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
|
||||
(*q)->wptr_bo = wptr_bo;
|
||||
}
|
||||
|
||||
pr_debug("PQM After init queue");
|
||||
|
@ -226,6 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
|||
struct file *f,
|
||||
struct queue_properties *properties,
|
||||
unsigned int *qid,
|
||||
struct amdgpu_bo *wptr_bo,
|
||||
const struct kfd_criu_queue_priv_data *q_data,
|
||||
const void *restore_mqd,
|
||||
const void *restore_ctl_stack,
|
||||
|
@ -288,7 +291,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
|||
* allocate_sdma_queue() in create_queue() has the
|
||||
* corresponding check logic.
|
||||
*/
|
||||
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
|
||||
retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
|
||||
if (retval != 0)
|
||||
goto err_create_queue;
|
||||
pqn->q = q;
|
||||
|
@ -309,7 +312,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
|||
goto err_create_queue;
|
||||
}
|
||||
|
||||
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
|
||||
retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
|
||||
if (retval != 0)
|
||||
goto err_create_queue;
|
||||
pqn->q = q;
|
||||
|
@ -436,9 +439,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
|||
pdd->qpd.num_gws = 0;
|
||||
}
|
||||
|
||||
if (dev->shared_resources.enable_mes)
|
||||
if (dev->shared_resources.enable_mes) {
|
||||
amdgpu_amdkfd_free_gtt_mem(dev->adev,
|
||||
pqn->q->gang_ctx_bo);
|
||||
if (pqn->q->wptr_bo)
|
||||
amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
|
||||
|
||||
}
|
||||
uninit_queue(pqn->q);
|
||||
}
|
||||
|
||||
|
@ -491,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
|
||||
if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
|
||||
KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < minfo->cu_mask.count; i += 2) {
|
||||
uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
|
||||
|
||||
if (cu_pair && cu_pair != 0x3) {
|
||||
pr_debug("CUs must be adjacent pairwise enabled.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
|
||||
pqn->q, minfo);
|
||||
if (retval != 0)
|
||||
|
@ -844,7 +866,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
|
|||
|
||||
print_queue_properties(&qp);
|
||||
|
||||
ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
|
||||
ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
|
||||
NULL);
|
||||
if (ret) {
|
||||
pr_err("Failed to create new queue err:%d\n", ret);
|
||||
|
|
|
@ -38,6 +38,9 @@ struct kfd_smi_client {
|
|||
uint64_t events;
|
||||
struct kfd_dev *dev;
|
||||
spinlock_t lock;
|
||||
struct rcu_head rcu;
|
||||
pid_t pid;
|
||||
bool suser;
|
||||
};
|
||||
|
||||
#define MAX_KFIFO_SIZE 1024
|
||||
|
@ -135,6 +138,14 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
|
|||
return sizeof(events);
|
||||
}
|
||||
|
||||
static void kfd_smi_ev_client_free(struct rcu_head *p)
|
||||
{
|
||||
struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
|
||||
|
||||
kfifo_free(&ev->fifo);
|
||||
kfree(ev);
|
||||
}
|
||||
|
||||
static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct kfd_smi_client *client = filep->private_data;
|
||||
|
@ -144,23 +155,31 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
|
|||
list_del_rcu(&client->list);
|
||||
spin_unlock(&dev->smi_lock);
|
||||
|
||||
synchronize_rcu();
|
||||
kfifo_free(&client->fifo);
|
||||
kfree(client);
|
||||
|
||||
call_rcu(&client->rcu, kfd_smi_ev_client_free);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
|
||||
char *event_msg, int len)
|
||||
static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
|
||||
unsigned int event)
|
||||
{
|
||||
uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
|
||||
uint64_t events = READ_ONCE(client->events);
|
||||
|
||||
if (pid && client->pid != pid && !(client->suser && (events & all)))
|
||||
return false;
|
||||
|
||||
return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
|
||||
}
|
||||
|
||||
static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
|
||||
unsigned int smi_event, char *event_msg, int len)
|
||||
{
|
||||
struct kfd_smi_client *client;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
|
||||
if (!(READ_ONCE(client->events) &
|
||||
KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
|
||||
if (!kfd_smi_ev_enabled(pid, client, smi_event))
|
||||
continue;
|
||||
spin_lock(&client->lock);
|
||||
if (kfifo_avail(&client->fifo) >= len) {
|
||||
|
@ -176,9 +195,9 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
__printf(3, 4)
|
||||
static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
|
||||
char *fmt, ...)
|
||||
__printf(4, 5)
|
||||
static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev,
|
||||
unsigned int event, char *fmt, ...)
|
||||
{
|
||||
char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
|
||||
int len;
|
||||
|
@ -193,7 +212,7 @@ static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
|
|||
len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
add_event_to_kfifo(dev, event, fifo_in, len);
|
||||
add_event_to_kfifo(pid, dev, event, fifo_in, len);
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
|
||||
|
@ -206,13 +225,13 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
|
|||
event = KFD_SMI_EVENT_GPU_PRE_RESET;
|
||||
++(dev->reset_seq_num);
|
||||
}
|
||||
kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num);
|
||||
kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
|
||||
uint64_t throttle_bitmask)
|
||||
{
|
||||
kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
|
||||
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
|
||||
throttle_bitmask,
|
||||
amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
|
||||
}
|
||||
|
@ -227,10 +246,93 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
|
|||
if (!task_info.pid)
|
||||
return;
|
||||
|
||||
kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
|
||||
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
|
||||
task_info.pid, task_info.task_name);
|
||||
}
|
||||
|
||||
void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long address, bool write_fault,
|
||||
ktime_t ts)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,
|
||||
"%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
|
||||
address, dev->id, write_fault ? 'W' : 'R');
|
||||
}
|
||||
|
||||
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long address, bool migration)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,
|
||||
"%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
|
||||
pid, address, dev->id, migration ? 'M' : 'U');
|
||||
}
|
||||
|
||||
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long start, unsigned long end,
|
||||
uint32_t from, uint32_t to,
|
||||
uint32_t prefetch_loc, uint32_t preferred_loc,
|
||||
uint32_t trigger)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
|
||||
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
|
||||
ktime_get_boottime_ns(), pid, start, end - start,
|
||||
from, to, prefetch_loc, preferred_loc, trigger);
|
||||
}
|
||||
|
||||
void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long start, unsigned long end,
|
||||
uint32_t from, uint32_t to, uint32_t trigger)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
|
||||
"%lld -%d @%lx(%lx) %x->%x %d\n",
|
||||
ktime_get_boottime_ns(), pid, start, end - start,
|
||||
from, to, trigger);
|
||||
}
|
||||
|
||||
void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
|
||||
uint32_t trigger)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION,
|
||||
"%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
|
||||
dev->id, trigger);
|
||||
}
|
||||
|
||||
void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE,
|
||||
"%lld -%d %x\n", ktime_get_boottime_ns(), pid,
|
||||
dev->id);
|
||||
}
|
||||
|
||||
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
int i;
|
||||
|
||||
p = kfd_lookup_process_by_mm(mm);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_process_device *pdd = p->pdds[i];
|
||||
|
||||
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
|
||||
KFD_SMI_EVENT_QUEUE_RESTORE,
|
||||
"%lld -%d %x %c\n", ktime_get_boottime_ns(),
|
||||
p->lead_thread->pid, pdd->dev->id, 'R');
|
||||
}
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
|
||||
void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long address, unsigned long last,
|
||||
uint32_t trigger)
|
||||
{
|
||||
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU,
|
||||
"%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
|
||||
pid, address, last - address + 1, dev->id, trigger);
|
||||
}
|
||||
|
||||
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
|
||||
{
|
||||
struct kfd_smi_client *client;
|
||||
|
@ -251,6 +353,8 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
|
|||
spin_lock_init(&client->lock);
|
||||
client->events = 0;
|
||||
client->dev = dev;
|
||||
client->pid = current->tgid;
|
||||
client->suser = capable(CAP_SYS_ADMIN);
|
||||
|
||||
spin_lock(&dev->smi_lock);
|
||||
list_add_rcu(&client->list, &dev->smi_clients);
|
||||
|
|
|
@ -29,5 +29,24 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
|
|||
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
|
||||
uint64_t throttle_bitmask);
|
||||
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
|
||||
|
||||
void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long address, bool write_fault,
|
||||
ktime_t ts);
|
||||
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long address, bool migration);
|
||||
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long start, unsigned long end,
|
||||
uint32_t from, uint32_t to,
|
||||
uint32_t prefetch_loc, uint32_t preferred_loc,
|
||||
uint32_t trigger);
|
||||
void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long start, unsigned long end,
|
||||
uint32_t from, uint32_t to, uint32_t trigger);
|
||||
void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
|
||||
uint32_t trigger);
|
||||
void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid);
|
||||
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
|
||||
void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
|
||||
unsigned long address, unsigned long last,
|
||||
uint32_t trigger);
|
||||
#endif
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "kfd_priv.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "kfd_migrate.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
#ifdef dev_fmt
|
||||
#undef dev_fmt
|
||||
|
@ -43,7 +44,7 @@
|
|||
/* Long enough to ensure no retry fault comes after svm range is restored and
|
||||
* page table is updated.
|
||||
*/
|
||||
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
|
||||
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
|
||||
|
||||
struct criu_svm_metadata {
|
||||
struct list_head list;
|
||||
|
@ -1199,7 +1200,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
|
||||
static int
|
||||
svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
|
||||
unsigned long last)
|
||||
unsigned long last, uint32_t trigger)
|
||||
{
|
||||
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
|
||||
struct kfd_process_device *pdd;
|
||||
|
@ -1231,6 +1232,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
|
||||
start, last, trigger);
|
||||
|
||||
r = svm_range_unmap_from_gpu(pdd->dev->adev,
|
||||
drm_priv_to_vm(pdd->drm_priv),
|
||||
start, last, &fence);
|
||||
|
@ -1617,7 +1621,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
|
|||
svm_range_unreserve_bos(&ctx);
|
||||
|
||||
if (!r)
|
||||
prange->validate_timestamp = ktime_to_us(ktime_get());
|
||||
prange->validate_timestamp = ktime_get_boottime();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -1729,14 +1733,16 @@ static void svm_range_restore_work(struct work_struct *work)
|
|||
mutex_unlock(&svms->lock);
|
||||
mmap_write_unlock(mm);
|
||||
mutex_unlock(&process_info->lock);
|
||||
mmput(mm);
|
||||
|
||||
/* If validation failed, reschedule another attempt */
|
||||
if (evicted_ranges) {
|
||||
pr_debug("reschedule to restore svm range\n");
|
||||
schedule_delayed_work(&svms->restore_work,
|
||||
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
|
||||
|
||||
kfd_smi_event_queue_restore_rescheduled(mm);
|
||||
}
|
||||
mmput(mm);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1756,7 +1762,8 @@ static void svm_range_restore_work(struct work_struct *work)
|
|||
*/
|
||||
static int
|
||||
svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
|
||||
unsigned long start, unsigned long last)
|
||||
unsigned long start, unsigned long last,
|
||||
enum mmu_notifier_event event)
|
||||
{
|
||||
struct svm_range_list *svms = prange->svms;
|
||||
struct svm_range *pchild;
|
||||
|
@ -1792,7 +1799,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
|
|||
prange->svms, prange->start, prange->last);
|
||||
|
||||
/* First eviction, stop the queues */
|
||||
r = kgd2kfd_quiesce_mm(mm);
|
||||
r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
|
||||
if (r)
|
||||
pr_debug("failed to quiesce KFD\n");
|
||||
|
||||
|
@ -1801,6 +1808,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
|
|||
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
|
||||
} else {
|
||||
unsigned long s, l;
|
||||
uint32_t trigger;
|
||||
|
||||
if (event == MMU_NOTIFY_MIGRATE)
|
||||
trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
|
||||
else
|
||||
trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
|
||||
|
||||
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
|
||||
prange->svms, start, last);
|
||||
|
@ -1809,13 +1822,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
|
|||
s = max(start, pchild->start);
|
||||
l = min(last, pchild->last);
|
||||
if (l >= s)
|
||||
svm_range_unmap_from_gpus(pchild, s, l);
|
||||
svm_range_unmap_from_gpus(pchild, s, l, trigger);
|
||||
mutex_unlock(&pchild->lock);
|
||||
}
|
||||
s = max(start, prange->start);
|
||||
l = min(last, prange->last);
|
||||
if (l >= s)
|
||||
svm_range_unmap_from_gpus(prange, s, l);
|
||||
svm_range_unmap_from_gpus(prange, s, l, trigger);
|
||||
}
|
||||
|
||||
return r;
|
||||
|
@ -2229,6 +2242,7 @@ static void
|
|||
svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
|
||||
unsigned long start, unsigned long last)
|
||||
{
|
||||
uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
|
||||
struct svm_range_list *svms;
|
||||
struct svm_range *pchild;
|
||||
struct kfd_process *p;
|
||||
|
@ -2256,14 +2270,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
|
|||
s = max(start, pchild->start);
|
||||
l = min(last, pchild->last);
|
||||
if (l >= s)
|
||||
svm_range_unmap_from_gpus(pchild, s, l);
|
||||
svm_range_unmap_from_gpus(pchild, s, l, trigger);
|
||||
svm_range_unmap_split(mm, prange, pchild, start, last);
|
||||
mutex_unlock(&pchild->lock);
|
||||
}
|
||||
s = max(start, prange->start);
|
||||
l = min(last, prange->last);
|
||||
if (l >= s)
|
||||
svm_range_unmap_from_gpus(prange, s, l);
|
||||
svm_range_unmap_from_gpus(prange, s, l, trigger);
|
||||
svm_range_unmap_split(mm, prange, prange, start, last);
|
||||
|
||||
if (unmap_parent)
|
||||
|
@ -2330,7 +2344,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
|
|||
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
|
||||
break;
|
||||
default:
|
||||
svm_range_evict(prange, mni->mm, start, last);
|
||||
svm_range_evict(prange, mni->mm, start, last, range->event);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2694,11 +2708,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
|
|||
struct svm_range_list *svms;
|
||||
struct svm_range *prange;
|
||||
struct kfd_process *p;
|
||||
uint64_t timestamp;
|
||||
ktime_t timestamp = ktime_get_boottime();
|
||||
int32_t best_loc;
|
||||
int32_t gpuidx = MAX_GPU_INSTANCE;
|
||||
bool write_locked = false;
|
||||
struct vm_area_struct *vma;
|
||||
bool migration = false;
|
||||
int r = 0;
|
||||
|
||||
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
|
||||
|
@ -2775,9 +2790,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
|
|||
goto out_unlock_range;
|
||||
}
|
||||
|
||||
timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
|
||||
/* skip duplicate vm fault on different pages of same range */
|
||||
if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
|
||||
if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
|
||||
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
|
||||
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
|
||||
svms, prange->start, prange->last);
|
||||
r = 0;
|
||||
|
@ -2813,9 +2828,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
|
|||
svms, prange->start, prange->last, best_loc,
|
||||
prange->actual_loc);
|
||||
|
||||
kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
|
||||
write_fault, timestamp);
|
||||
|
||||
if (prange->actual_loc != best_loc) {
|
||||
migration = true;
|
||||
if (best_loc) {
|
||||
r = svm_migrate_to_vram(prange, best_loc, mm);
|
||||
r = svm_migrate_to_vram(prange, best_loc, mm,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
|
||||
if (r) {
|
||||
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
|
||||
r, addr);
|
||||
|
@ -2823,12 +2843,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
|
|||
* VRAM failed
|
||||
*/
|
||||
if (prange->actual_loc)
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
r = svm_migrate_vram_to_ram(prange, mm,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
|
||||
else
|
||||
r = 0;
|
||||
}
|
||||
} else {
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
r = svm_migrate_vram_to_ram(prange, mm,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
|
||||
}
|
||||
if (r) {
|
||||
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
|
||||
|
@ -2842,6 +2864,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
|
|||
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
|
||||
r, svms, prange->start, prange->last);
|
||||
|
||||
kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
|
||||
migration);
|
||||
|
||||
out_unlock_range:
|
||||
mutex_unlock(&prange->migrate_mutex);
|
||||
out_unlock_svms:
|
||||
|
@ -3148,12 +3173,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
|
|||
return 0;
|
||||
|
||||
if (!best_loc) {
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
|
||||
*migrated = !r;
|
||||
return r;
|
||||
}
|
||||
|
||||
r = svm_migrate_to_vram(prange, best_loc, mm);
|
||||
r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
|
||||
*migrated = !r;
|
||||
|
||||
return r;
|
||||
|
@ -3211,7 +3236,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
|
|||
mutex_lock(&prange->migrate_mutex);
|
||||
do {
|
||||
r = svm_migrate_vram_to_ram(prange,
|
||||
svm_bo->eviction_fence->mm);
|
||||
svm_bo->eviction_fence->mm,
|
||||
KFD_MIGRATE_TRIGGER_TTM_EVICTION);
|
||||
} while (!r && prange->actual_loc && --retries);
|
||||
|
||||
if (!r && prange->actual_loc)
|
||||
|
|
|
@ -125,7 +125,7 @@ struct svm_range {
|
|||
uint32_t actual_loc;
|
||||
uint8_t granularity;
|
||||
atomic_t invalid;
|
||||
uint64_t validate_timestamp;
|
||||
ktime_t validate_timestamp;
|
||||
struct mmu_interval_notifier notifier;
|
||||
struct svm_work_list_item work_item;
|
||||
struct list_head deferred_list;
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "kfd_svm.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu.h"
|
||||
|
||||
/* topology_device_list - Master list of all topology devices */
|
||||
static struct list_head topology_device_list;
|
||||
|
@ -148,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
|
|||
struct kfd_mem_properties *mem;
|
||||
struct kfd_cache_properties *cache;
|
||||
struct kfd_iolink_properties *iolink;
|
||||
struct kfd_iolink_properties *p2plink;
|
||||
struct kfd_perf_properties *perf;
|
||||
|
||||
list_del(&dev->list);
|
||||
|
@ -173,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
|
|||
kfree(iolink);
|
||||
}
|
||||
|
||||
while (dev->p2p_link_props.next != &dev->p2p_link_props) {
|
||||
p2plink = container_of(dev->p2p_link_props.next,
|
||||
struct kfd_iolink_properties, list);
|
||||
list_del(&p2plink->list);
|
||||
kfree(p2plink);
|
||||
}
|
||||
|
||||
while (dev->perf_props.next != &dev->perf_props) {
|
||||
perf = container_of(dev->perf_props.next,
|
||||
struct kfd_perf_properties, list);
|
||||
|
@ -214,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device(
|
|||
INIT_LIST_HEAD(&dev->mem_props);
|
||||
INIT_LIST_HEAD(&dev->cache_props);
|
||||
INIT_LIST_HEAD(&dev->io_link_props);
|
||||
INIT_LIST_HEAD(&dev->p2p_link_props);
|
||||
INIT_LIST_HEAD(&dev->perf_props);
|
||||
|
||||
list_add_tail(&dev->list, device_list);
|
||||
|
@ -465,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
|||
dev->node_props.caches_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "io_links_count",
|
||||
dev->node_props.io_links_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
|
||||
dev->node_props.p2p_links_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
|
||||
dev->node_props.cpu_core_id_base);
|
||||
sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
|
||||
|
@ -568,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
|
|||
|
||||
static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
|
||||
{
|
||||
struct kfd_iolink_properties *p2plink;
|
||||
struct kfd_iolink_properties *iolink;
|
||||
struct kfd_cache_properties *cache;
|
||||
struct kfd_mem_properties *mem;
|
||||
|
@ -585,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
|
|||
dev->kobj_iolink = NULL;
|
||||
}
|
||||
|
||||
if (dev->kobj_p2plink) {
|
||||
list_for_each_entry(p2plink, &dev->p2p_link_props, list)
|
||||
if (p2plink->kobj) {
|
||||
kfd_remove_sysfs_file(p2plink->kobj,
|
||||
&p2plink->attr);
|
||||
p2plink->kobj = NULL;
|
||||
}
|
||||
kobject_del(dev->kobj_p2plink);
|
||||
kobject_put(dev->kobj_p2plink);
|
||||
dev->kobj_p2plink = NULL;
|
||||
}
|
||||
|
||||
if (dev->kobj_cache) {
|
||||
list_for_each_entry(cache, &dev->cache_props, list)
|
||||
if (cache->kobj) {
|
||||
|
@ -631,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
|
|||
static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
|
||||
uint32_t id)
|
||||
{
|
||||
struct kfd_iolink_properties *p2plink;
|
||||
struct kfd_iolink_properties *iolink;
|
||||
struct kfd_cache_properties *cache;
|
||||
struct kfd_mem_properties *mem;
|
||||
|
@ -668,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
|
|||
if (!dev->kobj_iolink)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
|
||||
if (!dev->kobj_p2plink)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
|
||||
if (!dev->kobj_perf)
|
||||
return -ENOMEM;
|
||||
|
@ -757,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
|
|||
i++;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
|
||||
p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
|
||||
if (!p2plink->kobj)
|
||||
return -ENOMEM;
|
||||
ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
|
||||
dev->kobj_p2plink, "%d", i);
|
||||
if (ret < 0) {
|
||||
kobject_put(p2plink->kobj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
p2plink->attr.name = "properties";
|
||||
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
|
||||
sysfs_attr_init(&iolink->attr);
|
||||
ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
i++;
|
||||
}
|
||||
|
||||
/* All hardware blocks have the same number of attributes. */
|
||||
num_attrs = ARRAY_SIZE(perf_attr_iommu);
|
||||
list_for_each_entry(perf, &dev->perf_props, list) {
|
||||
|
@ -1145,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
|||
struct kfd_mem_properties *mem;
|
||||
struct kfd_cache_properties *cache;
|
||||
struct kfd_iolink_properties *iolink;
|
||||
struct kfd_iolink_properties *p2plink;
|
||||
|
||||
down_write(&topology_lock);
|
||||
list_for_each_entry(dev, &topology_device_list, list) {
|
||||
|
@ -1165,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
|||
cache->gpu = dev->gpu;
|
||||
list_for_each_entry(iolink, &dev->io_link_props, list)
|
||||
iolink->gpu = dev->gpu;
|
||||
list_for_each_entry(p2plink, &dev->p2p_link_props, list)
|
||||
p2plink->gpu = dev->gpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1287,6 +1341,253 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
|
|||
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create indirect links so apply flags setting to all */
|
||||
list_for_each_entry(link, &dev->p2p_link_props, list) {
|
||||
link->flags = CRAT_IOLINK_FLAGS_ENABLED;
|
||||
kfd_set_iolink_no_atomics(dev, NULL, link);
|
||||
peer_dev = kfd_topology_device_by_proximity_domain(
|
||||
link->node_to);
|
||||
|
||||
if (!peer_dev)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
|
||||
list) {
|
||||
if (inbound_link->node_to != link->node_from)
|
||||
continue;
|
||||
|
||||
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
|
||||
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
|
||||
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
|
||||
struct kfd_iolink_properties *p2plink)
|
||||
{
|
||||
int ret;
|
||||
|
||||
p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
|
||||
if (!p2plink->kobj)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
|
||||
dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
|
||||
if (ret < 0) {
|
||||
kobject_put(p2plink->kobj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
p2plink->attr.name = "properties";
|
||||
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
|
||||
sysfs_attr_init(&p2plink->attr);
|
||||
ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
|
||||
{
|
||||
struct kfd_iolink_properties *props = NULL, *props2 = NULL;
|
||||
struct kfd_iolink_properties *gpu_link, *cpu_link;
|
||||
struct kfd_topology_device *cpu_dev;
|
||||
int ret = 0;
|
||||
int i, num_cpu;
|
||||
|
||||
num_cpu = 0;
|
||||
list_for_each_entry(cpu_dev, &topology_device_list, list) {
|
||||
if (cpu_dev->gpu)
|
||||
break;
|
||||
num_cpu++;
|
||||
}
|
||||
|
||||
gpu_link = list_first_entry(&kdev->io_link_props,
|
||||
struct kfd_iolink_properties, list);
|
||||
if (!gpu_link)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < num_cpu; i++) {
|
||||
/* CPU <--> GPU */
|
||||
if (gpu_link->node_to == i)
|
||||
continue;
|
||||
|
||||
/* find CPU <--> CPU links */
|
||||
cpu_dev = kfd_topology_device_by_proximity_domain(i);
|
||||
if (cpu_dev) {
|
||||
list_for_each_entry(cpu_link,
|
||||
&cpu_dev->io_link_props, list) {
|
||||
if (cpu_link->node_to == gpu_link->node_to)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu_link->node_to != gpu_link->node_to)
|
||||
return -ENOMEM;
|
||||
|
||||
/* CPU <--> CPU <--> GPU, GPU node*/
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
|
||||
props->weight = gpu_link->weight + cpu_link->weight;
|
||||
props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
|
||||
props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
|
||||
props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
|
||||
props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
|
||||
|
||||
props->node_from = gpu_node;
|
||||
props->node_to = i;
|
||||
kdev->node_props.p2p_links_count++;
|
||||
list_add_tail(&props->list, &kdev->p2p_link_props);
|
||||
ret = kfd_build_p2p_node_entry(kdev, props);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* for small Bar, no CPU --> GPU in-direct links */
|
||||
if (kfd_dev_is_large_bar(kdev->gpu)) {
|
||||
/* CPU <--> CPU <--> GPU, CPU node*/
|
||||
props2 = kfd_alloc_struct(props2);
|
||||
if (!props2)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(props2, props, sizeof(struct kfd_iolink_properties));
|
||||
props2->node_from = i;
|
||||
props2->node_to = gpu_node;
|
||||
props2->kobj = NULL;
|
||||
cpu_dev->node_props.p2p_links_count++;
|
||||
list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
|
||||
ret = kfd_build_p2p_node_entry(cpu_dev, props2);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_HSA_AMD_P2P)
|
||||
static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
|
||||
struct kfd_topology_device *peer, int from, int to)
|
||||
{
|
||||
struct kfd_iolink_properties *props = NULL;
|
||||
struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
|
||||
struct kfd_topology_device *cpu_dev;
|
||||
int ret = 0;
|
||||
|
||||
if (!amdgpu_device_is_peer_accessible(
|
||||
kdev->gpu->adev,
|
||||
peer->gpu->adev))
|
||||
return ret;
|
||||
|
||||
iolink1 = list_first_entry(&kdev->io_link_props,
|
||||
struct kfd_iolink_properties, list);
|
||||
if (!iolink1)
|
||||
return -ENOMEM;
|
||||
|
||||
iolink2 = list_first_entry(&peer->io_link_props,
|
||||
struct kfd_iolink_properties, list);
|
||||
if (!iolink2)
|
||||
return -ENOMEM;
|
||||
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
|
||||
|
||||
props->weight = iolink1->weight + iolink2->weight;
|
||||
props->min_latency = iolink1->min_latency + iolink2->min_latency;
|
||||
props->max_latency = iolink1->max_latency + iolink2->max_latency;
|
||||
props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
|
||||
props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
|
||||
|
||||
if (iolink1->node_to != iolink2->node_to) {
|
||||
/* CPU->CPU link*/
|
||||
cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
|
||||
if (cpu_dev) {
|
||||
list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
|
||||
if (iolink3->node_to == iolink2->node_to)
|
||||
break;
|
||||
|
||||
props->weight += iolink3->weight;
|
||||
props->min_latency += iolink3->min_latency;
|
||||
props->max_latency += iolink3->max_latency;
|
||||
props->min_bandwidth = min(props->min_bandwidth,
|
||||
iolink3->min_bandwidth);
|
||||
props->max_bandwidth = min(props->max_bandwidth,
|
||||
iolink3->max_bandwidth);
|
||||
} else {
|
||||
WARN(1, "CPU node not found");
|
||||
}
|
||||
}
|
||||
|
||||
props->node_from = from;
|
||||
props->node_to = to;
|
||||
peer->node_props.p2p_links_count++;
|
||||
list_add_tail(&props->list, &peer->p2p_link_props);
|
||||
ret = kfd_build_p2p_node_entry(peer, props);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int kfd_dev_create_p2p_links(void)
|
||||
{
|
||||
struct kfd_topology_device *dev;
|
||||
struct kfd_topology_device *new_dev;
|
||||
#if defined(CONFIG_HSA_AMD_P2P)
|
||||
uint32_t i;
|
||||
#endif
|
||||
uint32_t k;
|
||||
int ret = 0;
|
||||
|
||||
k = 0;
|
||||
list_for_each_entry(dev, &topology_device_list, list)
|
||||
k++;
|
||||
if (k < 2)
|
||||
return 0;
|
||||
|
||||
new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
|
||||
if (WARN_ON(!new_dev->gpu))
|
||||
return 0;
|
||||
|
||||
k--;
|
||||
|
||||
/* create in-direct links */
|
||||
ret = kfd_create_indirect_link_prop(new_dev, k);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* create p2p links */
|
||||
#if defined(CONFIG_HSA_AMD_P2P)
|
||||
i = 0;
|
||||
list_for_each_entry(dev, &topology_device_list, list) {
|
||||
if (dev == new_dev)
|
||||
break;
|
||||
if (!dev->gpu || !dev->gpu->adev ||
|
||||
(dev->gpu->hive_id &&
|
||||
dev->gpu->hive_id == new_dev->gpu->hive_id))
|
||||
goto next;
|
||||
|
||||
/* check if node(s) is/are peer accessible in one direction or bi-direction */
|
||||
ret = kfd_add_peer_prop(new_dev, dev, i, k);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = kfd_add_peer_prop(dev, new_dev, k, i);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
next:
|
||||
i++;
|
||||
}
|
||||
#endif
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
|
@ -1305,7 +1606,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
|||
INIT_LIST_HEAD(&temp_topology_device_list);
|
||||
|
||||
gpu_id = kfd_generate_gpu_id(gpu);
|
||||
|
||||
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
|
||||
|
||||
/* Check to see if this gpu device exists in the topology_device_list.
|
||||
|
@ -1362,6 +1662,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
|||
dev->gpu_id = gpu_id;
|
||||
gpu->id = gpu_id;
|
||||
|
||||
kfd_dev_create_p2p_links();
|
||||
|
||||
/* TODO: Move the following lines to function
|
||||
* kfd_add_non_crat_information
|
||||
*/
|
||||
|
@ -1507,7 +1809,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
|||
static void kfd_topology_update_io_links(int proximity_domain)
|
||||
{
|
||||
struct kfd_topology_device *dev;
|
||||
struct kfd_iolink_properties *iolink, *tmp;
|
||||
struct kfd_iolink_properties *iolink, *p2plink, *tmp;
|
||||
|
||||
list_for_each_entry(dev, &topology_device_list, list) {
|
||||
if (dev->proximity_domain > proximity_domain)
|
||||
|
@ -1520,7 +1822,6 @@ static void kfd_topology_update_io_links(int proximity_domain)
|
|||
*/
|
||||
if (iolink->node_to == proximity_domain) {
|
||||
list_del(&iolink->list);
|
||||
dev->io_link_count--;
|
||||
dev->node_props.io_links_count--;
|
||||
} else {
|
||||
if (iolink->node_from > proximity_domain)
|
||||
|
@ -1529,6 +1830,22 @@ static void kfd_topology_update_io_links(int proximity_domain)
|
|||
iolink->node_to--;
|
||||
}
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
|
||||
/*
|
||||
* If there is a p2p link to the dev being deleted
|
||||
* then remove that p2p link also.
|
||||
*/
|
||||
if (p2plink->node_to == proximity_domain) {
|
||||
list_del(&p2plink->list);
|
||||
dev->node_props.p2p_links_count--;
|
||||
} else {
|
||||
if (p2plink->node_from > proximity_domain)
|
||||
p2plink->node_from--;
|
||||
if (p2plink->node_to > proximity_domain)
|
||||
p2plink->node_to--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ struct kfd_node_properties {
|
|||
uint32_t mem_banks_count;
|
||||
uint32_t caches_count;
|
||||
uint32_t io_links_count;
|
||||
uint32_t p2p_links_count;
|
||||
uint32_t cpu_core_id_base;
|
||||
uint32_t simd_id_base;
|
||||
uint32_t capability;
|
||||
|
@ -129,14 +130,15 @@ struct kfd_topology_device {
|
|||
struct list_head mem_props;
|
||||
uint32_t cache_count;
|
||||
struct list_head cache_props;
|
||||
uint32_t io_link_count;
|
||||
struct list_head io_link_props;
|
||||
struct list_head p2p_link_props;
|
||||
struct list_head perf_props;
|
||||
struct kfd_dev *gpu;
|
||||
struct kobject *kobj_node;
|
||||
struct kobject *kobj_mem;
|
||||
struct kobject *kobj_cache;
|
||||
struct kobject *kobj_iolink;
|
||||
struct kobject *kobj_p2plink;
|
||||
struct kobject *kobj_perf;
|
||||
struct attribute attr_gpuid;
|
||||
struct attribute attr_name;
|
||||
|
|
|
@ -91,10 +91,14 @@
|
|||
#include "dcn/dcn_1_0_offset.h"
|
||||
#include "dcn/dcn_1_0_sh_mask.h"
|
||||
#include "soc15_hw_ip.h"
|
||||
#include "soc15_common.h"
|
||||
#include "vega10_ip_offset.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
|
||||
#include "gc/gc_11_0_0_offset.h"
|
||||
#include "gc/gc_11_0_0_sh_mask.h"
|
||||
|
||||
#include "modules/inc/mod_freesync.h"
|
||||
#include "modules/power/power_helpers.h"
|
||||
#include "modules/inc/mod_info_packet.h"
|
||||
|
@ -120,6 +124,11 @@ MODULE_FIRMWARE(FIRMWARE_DCN_315_DMUB);
|
|||
#define FIRMWARE_DCN316_DMUB "amdgpu/dcn_3_1_6_dmcub.bin"
|
||||
MODULE_FIRMWARE(FIRMWARE_DCN316_DMUB);
|
||||
|
||||
#define FIRMWARE_DCN_V3_2_0_DMCUB "amdgpu/dcn_3_2_0_dmcub.bin"
|
||||
MODULE_FIRMWARE(FIRMWARE_DCN_V3_2_0_DMCUB);
|
||||
#define FIRMWARE_DCN_V3_2_1_DMCUB "amdgpu/dcn_3_2_1_dmcub.bin"
|
||||
MODULE_FIRMWARE(FIRMWARE_DCN_V3_2_1_DMCUB);
|
||||
|
||||
#define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin"
|
||||
MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
|
||||
|
||||
|
@ -1258,10 +1267,20 @@ static void vblank_control_worker(struct work_struct *work)
|
|||
|
||||
DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
|
||||
|
||||
/* Control PSR based on vblank requirements from OS */
|
||||
/*
|
||||
* Control PSR based on vblank requirements from OS
|
||||
*
|
||||
* If panel supports PSR SU, there's no need to disable PSR when OS is
|
||||
* submitting fast atomic commits (we infer this by whether the OS
|
||||
* requests vblank events). Fast atomic commits will simply trigger a
|
||||
* full-frame-update (FFU); a specific case of selective-update (SU)
|
||||
* where the SU region is the full hactive*vactive region. See
|
||||
* fill_dc_dirty_rects().
|
||||
*/
|
||||
if (vblank_work->stream && vblank_work->stream->link) {
|
||||
if (vblank_work->enable) {
|
||||
if (vblank_work->stream->link->psr_settings.psr_allow_active)
|
||||
if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
|
||||
vblank_work->stream->link->psr_settings.psr_allow_active)
|
||||
amdgpu_dm_psr_disable(vblank_work->stream);
|
||||
} else if (vblank_work->stream->link->psr_settings.psr_feature_enabled &&
|
||||
!vblank_work->stream->link->psr_settings.psr_allow_active &&
|
||||
|
@ -1509,6 +1528,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
|
|||
DRM_INFO("Seamless boot condition check passed\n");
|
||||
}
|
||||
|
||||
init_data.flags.enable_mipi_converter_optimization = true;
|
||||
|
||||
INIT_LIST_HEAD(&adev->dm.da_list);
|
||||
/* Display Core create. */
|
||||
adev->dm.dc = dc_create(&init_data);
|
||||
|
@ -1803,6 +1824,8 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
|
|||
case IP_VERSION(3, 1, 3):
|
||||
case IP_VERSION(3, 1, 5):
|
||||
case IP_VERSION(3, 1, 6):
|
||||
case IP_VERSION(3, 2, 0):
|
||||
case IP_VERSION(3, 2, 1):
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
@ -1926,6 +1949,14 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
|
|||
dmub_asic = DMUB_ASIC_DCN316;
|
||||
fw_name_dmub = FIRMWARE_DCN316_DMUB;
|
||||
break;
|
||||
case IP_VERSION(3, 2, 0):
|
||||
dmub_asic = DMUB_ASIC_DCN32;
|
||||
fw_name_dmub = FIRMWARE_DCN_V3_2_0_DMCUB;
|
||||
break;
|
||||
case IP_VERSION(3, 2, 1):
|
||||
dmub_asic = DMUB_ASIC_DCN321;
|
||||
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
|
||||
break;
|
||||
default:
|
||||
/* ASIC doesn't support DMUB. */
|
||||
return 0;
|
||||
|
@ -2172,7 +2203,8 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
|
|||
} else {
|
||||
ret = drm_dp_mst_topology_mgr_resume(mgr, true);
|
||||
if (ret < 0) {
|
||||
drm_dp_mst_topology_mgr_set_mst(mgr, false);
|
||||
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
|
||||
aconnector->dc_link);
|
||||
need_hotplug = true;
|
||||
}
|
||||
}
|
||||
|
@ -2554,34 +2586,6 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
|
|||
return;
|
||||
}
|
||||
|
||||
static void dm_set_dpms_off(struct dc_link *link, struct dm_crtc_state *acrtc_state)
|
||||
{
|
||||
struct dc_stream_state *stream_state;
|
||||
struct amdgpu_dm_connector *aconnector = link->priv;
|
||||
struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
|
||||
struct dc_stream_update stream_update;
|
||||
bool dpms_off = true;
|
||||
|
||||
memset(&stream_update, 0, sizeof(stream_update));
|
||||
stream_update.dpms_off = &dpms_off;
|
||||
|
||||
mutex_lock(&adev->dm.dc_lock);
|
||||
stream_state = dc_stream_find_from_link(link);
|
||||
|
||||
if (stream_state == NULL) {
|
||||
DRM_DEBUG_DRIVER("Error finding stream state associated with link!\n");
|
||||
mutex_unlock(&adev->dm.dc_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
stream_update.stream = stream_state;
|
||||
acrtc_state->force_dpms_off = true;
|
||||
dc_commit_updates_for_stream(stream_state->ctx->dc, NULL, 0,
|
||||
stream_state, &stream_update,
|
||||
stream_state->ctx->dc->current_state);
|
||||
mutex_unlock(&adev->dm.dc_lock);
|
||||
}
|
||||
|
||||
static int dm_resume(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = handle;
|
||||
|
@ -2814,7 +2818,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
|
|||
|
||||
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
|
||||
{
|
||||
u32 max_cll, min_cll, max, min, q, r;
|
||||
u32 max_avg, min_cll, max, min, q, r;
|
||||
struct amdgpu_dm_backlight_caps *caps;
|
||||
struct amdgpu_display_manager *dm;
|
||||
struct drm_connector *conn_base;
|
||||
|
@ -2844,7 +2848,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
|
|||
caps = &dm->backlight_caps[i];
|
||||
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
|
||||
caps->aux_support = false;
|
||||
max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
|
||||
max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
|
||||
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
|
||||
|
||||
if (caps->ext_caps->bits.oled == 1 /*||
|
||||
|
@ -2872,8 +2876,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
|
|||
* The results of the above expressions can be verified at
|
||||
* pre_computed_values.
|
||||
*/
|
||||
q = max_cll >> 5;
|
||||
r = max_cll % 32;
|
||||
q = max_avg >> 5;
|
||||
r = max_avg % 32;
|
||||
max = (1 << q) * pre_computed_values[r];
|
||||
|
||||
// min luminance: maxLum * (CV/255)^2 / 100
|
||||
|
@ -3032,16 +3036,13 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
|
|||
struct drm_device *dev = connector->dev;
|
||||
enum dc_connection_type new_connection_type = dc_connection_none;
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
#ifdef CONFIG_DRM_AMD_DC_HDCP
|
||||
struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state);
|
||||
struct dm_crtc_state *dm_crtc_state = NULL;
|
||||
#endif
|
||||
|
||||
if (adev->dm.disable_hpd_irq)
|
||||
return;
|
||||
|
||||
if (dm_con_state->base.state && dm_con_state->base.crtc)
|
||||
dm_crtc_state = to_dm_crtc_state(drm_atomic_get_crtc_state(
|
||||
dm_con_state->base.state,
|
||||
dm_con_state->base.crtc));
|
||||
/*
|
||||
* In case of failure or MST no need to update connector status or notify the OS
|
||||
* since (for MST case) MST does this in its own context.
|
||||
|
@ -3071,11 +3072,6 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
|
|||
drm_kms_helper_connector_hotplug_event(connector);
|
||||
|
||||
} else if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) {
|
||||
if (new_connection_type == dc_connection_none &&
|
||||
aconnector->dc_link->type == dc_connection_none &&
|
||||
dm_crtc_state)
|
||||
dm_set_dpms_off(aconnector->dc_link, dm_crtc_state);
|
||||
|
||||
amdgpu_dm_update_connector_after_detect(aconnector);
|
||||
|
||||
drm_modeset_lock_all(dev);
|
||||
|
@ -3868,9 +3864,6 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
|
|||
#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
|
||||
#define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
|
||||
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
|
||||
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
|
||||
static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
|
||||
int bl_idx)
|
||||
{
|
||||
|
@ -4074,7 +4067,6 @@ amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm)
|
|||
else
|
||||
DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int initialize_plane(struct amdgpu_display_manager *dm,
|
||||
struct amdgpu_mode_info *mode_info, int plane_id,
|
||||
|
@ -4120,9 +4112,6 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
|
|||
static void register_backlight_device(struct amdgpu_display_manager *dm,
|
||||
struct dc_link *link)
|
||||
{
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
|
||||
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
|
||||
if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
|
||||
link->type != dc_connection_none) {
|
||||
/*
|
||||
|
@ -4138,7 +4127,6 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
|
|||
dm->num_of_edps++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -4235,6 +4223,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
|
|||
case IP_VERSION(3, 1, 3):
|
||||
case IP_VERSION(3, 1, 5):
|
||||
case IP_VERSION(3, 1, 6):
|
||||
case IP_VERSION(3, 2, 0):
|
||||
case IP_VERSION(3, 2, 1):
|
||||
case IP_VERSION(2, 1, 0):
|
||||
if (register_outbox_irq_handlers(dm->adev)) {
|
||||
DRM_ERROR("DM: Failed to initialize IRQ\n");
|
||||
|
@ -4253,6 +4243,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
|
|||
case IP_VERSION(3, 1, 3):
|
||||
case IP_VERSION(3, 1, 5):
|
||||
case IP_VERSION(3, 1, 6):
|
||||
case IP_VERSION(3, 2, 0):
|
||||
case IP_VERSION(3, 2, 1):
|
||||
psr_feature_enabled = true;
|
||||
break;
|
||||
default:
|
||||
|
@ -4261,9 +4253,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
/* Disable vblank IRQs aggressively for power-saving. */
|
||||
adev_to_drm(adev)->vblank_disable_immediate = true;
|
||||
|
||||
/* loops over all connectors on the board */
|
||||
for (i = 0; i < link_cnt; i++) {
|
||||
struct dc_link *link = NULL;
|
||||
|
@ -4370,6 +4359,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
|
|||
case IP_VERSION(3, 1, 3):
|
||||
case IP_VERSION(3, 1, 5):
|
||||
case IP_VERSION(3, 1, 6):
|
||||
case IP_VERSION(3, 2, 0):
|
||||
case IP_VERSION(3, 2, 1):
|
||||
if (dcn10_register_irq_handlers(dm->adev)) {
|
||||
DRM_ERROR("DM: Failed to initialize IRQ\n");
|
||||
goto fail;
|
||||
|
@ -4556,6 +4547,8 @@ static int dm_early_init(void *handle)
|
|||
case IP_VERSION(3, 1, 3):
|
||||
case IP_VERSION(3, 1, 5):
|
||||
case IP_VERSION(3, 1, 6):
|
||||
case IP_VERSION(3, 2, 0):
|
||||
case IP_VERSION(3, 2, 1):
|
||||
adev->mode_info.num_crtc = 4;
|
||||
adev->mode_info.num_hpd = 4;
|
||||
adev->mode_info.num_dig = 4;
|
||||
|
@ -4865,7 +4858,9 @@ fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
|
|||
unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier);
|
||||
unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
|
||||
unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier);
|
||||
unsigned int pipes_log2 = min(4u, mod_pipe_xor_bits);
|
||||
unsigned int pipes_log2;
|
||||
|
||||
pipes_log2 = min(5u, mod_pipe_xor_bits);
|
||||
|
||||
fill_gfx9_tiling_info_from_device(adev, tiling_info);
|
||||
|
||||
|
@ -5201,8 +5196,73 @@ add_gfx10_3_modifiers(const struct amdgpu_device *adev,
|
|||
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
|
||||
}
|
||||
|
||||
static void
|
||||
add_gfx11_modifiers(struct amdgpu_device *adev,
|
||||
uint64_t **mods, uint64_t *size, uint64_t *capacity)
|
||||
{
|
||||
int num_pipes = 0;
|
||||
int pipe_xor_bits = 0;
|
||||
int num_pkrs = 0;
|
||||
int pkrs = 0;
|
||||
u32 gb_addr_config;
|
||||
u8 i = 0;
|
||||
unsigned swizzle_r_x;
|
||||
uint64_t modifier_r_x;
|
||||
uint64_t modifier_dcc_best;
|
||||
uint64_t modifier_dcc_4k;
|
||||
|
||||
/* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from
|
||||
* adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} */
|
||||
gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
|
||||
ASSERT(gb_addr_config != 0);
|
||||
|
||||
num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
|
||||
pkrs = ilog2(num_pkrs);
|
||||
num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES);
|
||||
pipe_xor_bits = ilog2(num_pipes);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
/* Insert the best one first. */
|
||||
/* R_X swizzle modes are the best for rendering and DCC requires them. */
|
||||
if (num_pipes > 16)
|
||||
swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X;
|
||||
else
|
||||
swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X;
|
||||
|
||||
modifier_r_x = AMD_FMT_MOD |
|
||||
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
|
||||
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
|
||||
AMD_FMT_MOD_SET(TILE, swizzle_r_x) |
|
||||
AMD_FMT_MOD_SET(PACKERS, pkrs);
|
||||
|
||||
/* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */
|
||||
modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) |
|
||||
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) |
|
||||
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
|
||||
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B);
|
||||
|
||||
/* DCC settings for 4K and greater resolutions. (required by display hw) */
|
||||
modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) |
|
||||
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
|
||||
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
|
||||
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
|
||||
|
||||
add_modifier(mods, size, capacity, modifier_dcc_best);
|
||||
add_modifier(mods, size, capacity, modifier_dcc_4k);
|
||||
|
||||
add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1));
|
||||
add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1));
|
||||
|
||||
add_modifier(mods, size, capacity, modifier_r_x);
|
||||
}
|
||||
|
||||
add_modifier(mods, size, capacity, AMD_FMT_MOD |
|
||||
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
|
||||
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D));
|
||||
}
|
||||
|
||||
static int
|
||||
get_plane_modifiers(const struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
|
||||
get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
|
||||
{
|
||||
uint64_t size = 0, capacity = 128;
|
||||
*mods = NULL;
|
||||
|
@ -5234,6 +5294,9 @@ get_plane_modifiers(const struct amdgpu_device *adev, unsigned int plane_type, u
|
|||
else
|
||||
add_gfx10_1_modifiers(adev, mods, &size, &capacity);
|
||||
break;
|
||||
case AMDGPU_FAMILY_GC_11_0_0:
|
||||
add_gfx11_modifiers(adev, mods, &size, &capacity);
|
||||
break;
|
||||
}
|
||||
|
||||
add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
|
||||
|
@ -5272,7 +5335,7 @@ fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
|
|||
dcc->enable = 1;
|
||||
dcc->meta_pitch = afb->base.pitches[1];
|
||||
dcc->independent_64b_blks = independent_64b_blks;
|
||||
if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) {
|
||||
if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) {
|
||||
if (independent_64b_blks && independent_128b_blks)
|
||||
dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl;
|
||||
else if (independent_128b_blks)
|
||||
|
@ -5640,6 +5703,117 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
|
||||
*
|
||||
* @plane: DRM plane containing dirty regions that need to be flushed to the eDP
|
||||
* remote fb
|
||||
* @old_plane_state: Old state of @plane
|
||||
* @new_plane_state: New state of @plane
|
||||
* @crtc_state: New state of CRTC connected to the @plane
|
||||
* @flip_addrs: DC flip tracking struct, which also tracts dirty rects
|
||||
*
|
||||
* For PSR SU, DC informs the DMUB uController of dirty rectangle regions
|
||||
* (referred to as "damage clips" in DRM nomenclature) that require updating on
|
||||
* the eDP remote buffer. The responsibility of specifying the dirty regions is
|
||||
* amdgpu_dm's.
|
||||
*
|
||||
* A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the
|
||||
* plane with regions that require flushing to the eDP remote buffer. In
|
||||
* addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
|
||||
* implicitly provide damage clips without any client support via the plane
|
||||
* bounds.
|
||||
*
|
||||
* Today, amdgpu_dm only supports the MPO and cursor usecase.
|
||||
*
|
||||
* TODO: Also enable for FB_DAMAGE_CLIPS
|
||||
*/
|
||||
static void fill_dc_dirty_rects(struct drm_plane *plane,
|
||||
struct drm_plane_state *old_plane_state,
|
||||
struct drm_plane_state *new_plane_state,
|
||||
struct drm_crtc_state *crtc_state,
|
||||
struct dc_flip_addrs *flip_addrs)
|
||||
{
|
||||
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
|
||||
struct rect *dirty_rects = flip_addrs->dirty_rects;
|
||||
uint32_t num_clips;
|
||||
bool bb_changed;
|
||||
bool fb_changed;
|
||||
uint32_t i = 0;
|
||||
|
||||
flip_addrs->dirty_rect_count = 0;
|
||||
|
||||
/*
|
||||
* Cursor plane has it's own dirty rect update interface. See
|
||||
* dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
|
||||
*/
|
||||
if (plane->type == DRM_PLANE_TYPE_CURSOR)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Today, we only consider MPO use-case for PSR SU. If MPO not
|
||||
* requested, and there is a plane update, do FFU.
|
||||
*/
|
||||
if (!dm_crtc_state->mpo_requested) {
|
||||
dirty_rects[0].x = 0;
|
||||
dirty_rects[0].y = 0;
|
||||
dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay;
|
||||
dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay;
|
||||
flip_addrs->dirty_rect_count = 1;
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
|
||||
new_plane_state->plane->base.id,
|
||||
dm_crtc_state->base.mode.crtc_hdisplay,
|
||||
dm_crtc_state->base.mode.crtc_vdisplay);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* MPO is requested. Add entire plane bounding box to dirty rects if
|
||||
* flipped to or damaged.
|
||||
*
|
||||
* If plane is moved or resized, also add old bounding box to dirty
|
||||
* rects.
|
||||
*/
|
||||
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
|
||||
fb_changed = old_plane_state->fb->base.id !=
|
||||
new_plane_state->fb->base.id;
|
||||
bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
|
||||
old_plane_state->crtc_y != new_plane_state->crtc_y ||
|
||||
old_plane_state->crtc_w != new_plane_state->crtc_w ||
|
||||
old_plane_state->crtc_h != new_plane_state->crtc_h);
|
||||
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
|
||||
new_plane_state->plane->base.id,
|
||||
bb_changed, fb_changed, num_clips);
|
||||
|
||||
if (num_clips || fb_changed || bb_changed) {
|
||||
dirty_rects[i].x = new_plane_state->crtc_x;
|
||||
dirty_rects[i].y = new_plane_state->crtc_y;
|
||||
dirty_rects[i].width = new_plane_state->crtc_w;
|
||||
dirty_rects[i].height = new_plane_state->crtc_h;
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
|
||||
new_plane_state->plane->base.id,
|
||||
dirty_rects[i].x, dirty_rects[i].y,
|
||||
dirty_rects[i].width, dirty_rects[i].height);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
/* Add old plane bounding-box if plane is moved or resized */
|
||||
if (bb_changed) {
|
||||
dirty_rects[i].x = old_plane_state->crtc_x;
|
||||
dirty_rects[i].y = old_plane_state->crtc_y;
|
||||
dirty_rects[i].width = old_plane_state->crtc_w;
|
||||
dirty_rects[i].height = old_plane_state->crtc_h;
|
||||
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
|
||||
old_plane_state->plane->base.id,
|
||||
dirty_rects[i].x, dirty_rects[i].y,
|
||||
dirty_rects[i].width, dirty_rects[i].height);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
flip_addrs->dirty_rect_count = i;
|
||||
}
|
||||
|
||||
static void update_stream_scaling_settings(const struct drm_display_mode *mode,
|
||||
const struct dm_connector_state *dm_state,
|
||||
struct dc_stream_state *stream)
|
||||
|
@ -6587,7 +6761,7 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
|
|||
state->freesync_config = cur->freesync_config;
|
||||
state->cm_has_degamma = cur->cm_has_degamma;
|
||||
state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
|
||||
state->force_dpms_off = cur->force_dpms_off;
|
||||
state->mpo_requested = cur->mpo_requested;
|
||||
/* TODO Duplicate dc_stream after objects are stream object is flattened */
|
||||
|
||||
return &state->base;
|
||||
|
@ -6679,7 +6853,7 @@ static void dm_disable_vblank(struct drm_crtc *crtc)
|
|||
dm_set_vblank(crtc, false);
|
||||
}
|
||||
|
||||
/* Implemented only the options currently availible for the driver */
|
||||
/* Implemented only the options currently available for the driver */
|
||||
static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
|
||||
.reset = dm_crtc_reset_state,
|
||||
.destroy = amdgpu_dm_crtc_destroy,
|
||||
|
@ -6846,15 +7020,12 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
|
|||
if (aconnector->mst_mgr.dev)
|
||||
drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr);
|
||||
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
|
||||
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
for (i = 0; i < dm->num_of_edps; i++) {
|
||||
if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) {
|
||||
backlight_device_unregister(dm->backlight_dev[i]);
|
||||
dm->backlight_dev[i] = NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (aconnector->dc_em_sink)
|
||||
dc_sink_release(aconnector->dc_em_sink);
|
||||
|
@ -7042,7 +7213,11 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
|
|||
break;
|
||||
}
|
||||
|
||||
dc_result = dc_validate_stream(adev->dm.dc, stream);
|
||||
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
|
||||
dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
|
||||
|
||||
if (dc_result == DC_OK)
|
||||
dc_result = dc_validate_stream(adev->dm.dc, stream);
|
||||
|
||||
if (dc_result != DC_OK) {
|
||||
DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n",
|
||||
|
@ -7342,7 +7517,7 @@ static void dm_encoder_helper_disable(struct drm_encoder *encoder)
|
|||
|
||||
}
|
||||
|
||||
static int convert_dc_color_depth_into_bpc (enum dc_color_depth display_color_depth)
|
||||
int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth)
|
||||
{
|
||||
switch (display_color_depth) {
|
||||
case COLOR_DEPTH_666:
|
||||
|
@ -9224,6 +9399,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
|
|||
bundle->surface_updates[planes_count].plane_info =
|
||||
&bundle->plane_infos[planes_count];
|
||||
|
||||
fill_dc_dirty_rects(plane, old_plane_state, new_plane_state,
|
||||
new_crtc_state,
|
||||
&bundle->flip_addrs[planes_count]);
|
||||
|
||||
/*
|
||||
* Only allow immediate flips for fast updates that don't
|
||||
* change FB pitch, DCC state, rotation or mirroing.
|
||||
|
@ -9310,8 +9489,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
|
|||
* and rely on sending it from software.
|
||||
*/
|
||||
if (acrtc_attach->base.state->event &&
|
||||
acrtc_state->active_planes > 0 &&
|
||||
!acrtc_state->force_dpms_off) {
|
||||
acrtc_state->active_planes > 0) {
|
||||
drm_crtc_vblank_get(pcrtc);
|
||||
|
||||
spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
|
||||
|
@ -9419,6 +9597,18 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
|
|||
|
||||
/* Allow PSR when skip count is 0. */
|
||||
acrtc_attach->dm_irq_params.allow_psr_entry = !aconn->psr_skip_count;
|
||||
|
||||
/*
|
||||
* If sink supports PSR SU, there is no need to rely on
|
||||
* a vblank event disable request to enable PSR. PSR SU
|
||||
* can be enabled immediately once OS demonstrates an
|
||||
* adequate number of fast atomic commits to notify KMD
|
||||
* of update events. See `vblank_control_worker()`.
|
||||
*/
|
||||
if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
|
||||
acrtc_attach->dm_irq_params.allow_psr_entry &&
|
||||
!acrtc_state->stream->link->psr_settings.psr_allow_active)
|
||||
amdgpu_dm_psr_enable(acrtc_state->stream);
|
||||
} else {
|
||||
acrtc_attach->dm_irq_params.allow_psr_entry = false;
|
||||
}
|
||||
|
@ -9912,15 +10102,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
|
|||
/* Update audio instances for each connector. */
|
||||
amdgpu_dm_commit_audio(dev, state);
|
||||
|
||||
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \
|
||||
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
|
||||
/* restore the backlight level */
|
||||
for (i = 0; i < dm->num_of_edps; i++) {
|
||||
if (dm->backlight_dev[i] &&
|
||||
(dm->actual_brightness[i] != dm->brightness[i]))
|
||||
amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* send vblank event on all events not handled in flip and
|
||||
* mark consumed event for drm_atomic_helper_commit_hw_done
|
||||
|
@ -10368,7 +10556,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
|
|||
* added MST connectors not found in existing crtc_state in the chained mode
|
||||
* TODO: need to dig out the root cause of that
|
||||
*/
|
||||
if (!aconnector || (!aconnector->dc_sink && aconnector->mst_port))
|
||||
if (!aconnector)
|
||||
goto skip_modeset;
|
||||
|
||||
if (modereset_required(new_crtc_state))
|
||||
|
@ -10979,7 +11167,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
|
|||
}
|
||||
}
|
||||
}
|
||||
pre_validate_dsc(state, &dm_state, vars);
|
||||
if (!pre_validate_dsc(state, &dm_state, vars)) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
|
||||
|
@ -11225,6 +11416,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
|
|||
#if defined(CONFIG_DRM_AMD_DC_DCN)
|
||||
if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) {
|
||||
DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
|
|
@ -242,6 +242,13 @@ struct hpd_rx_irq_offload_work {
|
|||
* @force_timing_sync: set via debugfs. When set, indicates that all connected
|
||||
* displays will be forced to synchronize.
|
||||
* @dmcub_trace_event_en: enable dmcub trace events
|
||||
* @dmub_outbox_params: DMUB Outbox parameters
|
||||
* @num_of_edps: number of backlight eDPs
|
||||
* @disable_hpd_irq: disables all HPD and HPD RX interrupt handling in the
|
||||
* driver when true
|
||||
* @dmub_aux_transfer_done: struct completion used to indicate when DMUB
|
||||
* transfers are done
|
||||
* @delayed_hpd_wq: work queue used to delay DMUB HPD work
|
||||
*/
|
||||
struct amdgpu_display_manager {
|
||||
|
||||
|
@ -583,7 +590,6 @@ struct amdgpu_dm_connector {
|
|||
struct drm_dp_mst_port *port;
|
||||
struct amdgpu_dm_connector *mst_port;
|
||||
struct drm_dp_aux *dsc_aux;
|
||||
|
||||
/* TODO see if we can merge with ddc_bus or make a dm_connector */
|
||||
struct amdgpu_i2c_adapter *i2c;
|
||||
|
||||
|
@ -639,8 +645,6 @@ struct dm_crtc_state {
|
|||
|
||||
bool dsc_force_changed;
|
||||
bool vrr_supported;
|
||||
|
||||
bool force_dpms_off;
|
||||
struct mod_freesync_config freesync_config;
|
||||
struct dc_info_packet vrr_infopacket;
|
||||
|
||||
|
@ -749,4 +753,6 @@ int dm_atomic_get_state(struct drm_atomic_state *state,
|
|||
struct amdgpu_dm_connector *
|
||||
amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
|
||||
struct drm_crtc *crtc);
|
||||
|
||||
int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth);
|
||||
#endif /* __AMDGPU_DM_H__ */
|
||||
|
|
|
@ -540,11 +540,11 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
|
|||
|
||||
/* apply phy settings from user */
|
||||
for (r = 0; r < link_lane_settings.link_settings.lane_count; r++) {
|
||||
link_lane_settings.lane_settings[r].VOLTAGE_SWING =
|
||||
link_lane_settings.hw_lane_settings[r].VOLTAGE_SWING =
|
||||
(enum dc_voltage_swing) (param[0]);
|
||||
link_lane_settings.lane_settings[r].PRE_EMPHASIS =
|
||||
link_lane_settings.hw_lane_settings[r].PRE_EMPHASIS =
|
||||
(enum dc_pre_emphasis) (param[1]);
|
||||
link_lane_settings.lane_settings[r].POST_CURSOR2 =
|
||||
link_lane_settings.hw_lane_settings[r].POST_CURSOR2 =
|
||||
(enum dc_post_cursor2) (param[2]);
|
||||
}
|
||||
|
||||
|
@ -738,7 +738,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
|
|||
}
|
||||
|
||||
for (i = 0; i < (unsigned int)(link_training_settings.link_settings.lane_count); i++)
|
||||
link_training_settings.lane_settings[i] = link->cur_lane_setting[i];
|
||||
link_training_settings.hw_lane_settings[i] = link->cur_lane_setting[i];
|
||||
|
||||
dc_link_set_test_pattern(
|
||||
link,
|
||||
|
|
|
@ -476,13 +476,16 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
|
|||
link->ddc_line = aconnector->dc_link->ddc_hw_inst + 1;
|
||||
display->stream_enc_idx = config->stream_enc_idx;
|
||||
link->link_enc_idx = config->link_enc_idx;
|
||||
link->dio_output_id = config->dio_output_idx;
|
||||
link->phy_idx = config->phy_idx;
|
||||
|
||||
if (sink)
|
||||
link_is_hdcp14 = dc_link_is_hdcp14(aconnector->dc_link, sink->sink_signal);
|
||||
link->hdcp_supported_informational = link_is_hdcp14;
|
||||
link->dp.rev = aconnector->dc_link->dpcd_caps.dpcd_rev.raw;
|
||||
link->dp.assr_enabled = config->assr_enabled;
|
||||
link->dp.mst_enabled = config->mst_enabled;
|
||||
link->dp.usb4_enabled = config->usb4_enabled;
|
||||
display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
|
||||
link->adjust.auth_delay = 3;
|
||||
link->adjust.hdcp1.disable = 0;
|
||||
|
|
|
@ -451,7 +451,6 @@ bool dm_helpers_dp_mst_stop_top_mgr(
|
|||
struct dc_link *link)
|
||||
{
|
||||
struct amdgpu_dm_connector *aconnector = link->priv;
|
||||
uint8_t i;
|
||||
|
||||
if (!aconnector) {
|
||||
DRM_ERROR("Failed to find connector for link!");
|
||||
|
@ -463,22 +462,7 @@ bool dm_helpers_dp_mst_stop_top_mgr(
|
|||
|
||||
if (aconnector->mst_mgr.mst_state == true) {
|
||||
drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, false);
|
||||
|
||||
for (i = 0; i < MAX_SINKS_PER_LINK; i++) {
|
||||
if (link->remote_sinks[i] == NULL)
|
||||
continue;
|
||||
|
||||
if (link->remote_sinks[i]->sink_signal ==
|
||||
SIGNAL_TYPE_DISPLAY_PORT_MST) {
|
||||
dc_link_remove_remote_sink(link, link->remote_sinks[i]);
|
||||
|
||||
if (aconnector->dc_sink) {
|
||||
dc_sink_release(aconnector->dc_sink);
|
||||
aconnector->dc_sink = NULL;
|
||||
aconnector->dc_link->cur_link_settings.lane_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
link->cur_link_settings.lane_count = 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -140,11 +140,28 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
|
|||
static void
|
||||
amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
|
||||
{
|
||||
struct amdgpu_dm_connector *amdgpu_dm_connector =
|
||||
struct amdgpu_dm_connector *aconnector =
|
||||
to_amdgpu_dm_connector(connector);
|
||||
struct drm_dp_mst_port *port = amdgpu_dm_connector->port;
|
||||
struct drm_dp_mst_port *port = aconnector->port;
|
||||
struct amdgpu_dm_connector *root = aconnector->mst_port;
|
||||
struct dc_link *dc_link = aconnector->dc_link;
|
||||
struct dc_sink *dc_sink = aconnector->dc_sink;
|
||||
|
||||
drm_dp_mst_connector_early_unregister(connector, port);
|
||||
|
||||
/*
|
||||
* Release dc_sink for connector which its attached port is
|
||||
* no longer in the mst topology
|
||||
*/
|
||||
drm_modeset_lock(&root->mst_mgr.base.lock, NULL);
|
||||
if (dc_sink) {
|
||||
if (dc_link->sink_count)
|
||||
dc_link_remove_remote_sink(dc_link, dc_sink);
|
||||
|
||||
dc_sink_release(dc_sink);
|
||||
aconnector->dc_sink = NULL;
|
||||
}
|
||||
drm_modeset_unlock(&root->mst_mgr.base.lock);
|
||||
}
|
||||
|
||||
static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
|
||||
|
@ -344,12 +361,59 @@ dm_dp_mst_detect(struct drm_connector *connector,
|
|||
{
|
||||
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
|
||||
struct amdgpu_dm_connector *master = aconnector->mst_port;
|
||||
struct drm_dp_mst_port *port = aconnector->port;
|
||||
int connection_status;
|
||||
|
||||
if (drm_connector_is_unregistered(connector))
|
||||
return connector_status_disconnected;
|
||||
|
||||
return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
|
||||
aconnector->port);
|
||||
connection_status = drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
|
||||
aconnector->port);
|
||||
|
||||
if (port->pdt != DP_PEER_DEVICE_NONE && !port->dpcd_rev) {
|
||||
uint8_t dpcd_rev;
|
||||
int ret;
|
||||
|
||||
ret = drm_dp_dpcd_readb(&port->aux, DP_DP13_DPCD_REV, &dpcd_rev);
|
||||
|
||||
if (ret == 1) {
|
||||
port->dpcd_rev = dpcd_rev;
|
||||
|
||||
/* Could be DP1.2 DP Rx case*/
|
||||
if (!dpcd_rev) {
|
||||
ret = drm_dp_dpcd_readb(&port->aux, DP_DPCD_REV, &dpcd_rev);
|
||||
|
||||
if (ret == 1)
|
||||
port->dpcd_rev = dpcd_rev;
|
||||
}
|
||||
|
||||
if (!dpcd_rev)
|
||||
DRM_DEBUG_KMS("Can't decide DPCD revision number!");
|
||||
}
|
||||
|
||||
/*
|
||||
* Could be legacy sink, logical port etc on DP1.2.
|
||||
* Will get Nack under these cases when issue remote
|
||||
* DPCD read.
|
||||
*/
|
||||
if (ret != 1)
|
||||
DRM_DEBUG_KMS("Can't access DPCD");
|
||||
} else if (port->pdt == DP_PEER_DEVICE_NONE) {
|
||||
port->dpcd_rev = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release dc_sink for connector which unplug event is notified by CSN msg
|
||||
*/
|
||||
if (connection_status == connector_status_disconnected && aconnector->dc_sink) {
|
||||
if (aconnector->dc_link->sink_count)
|
||||
dc_link_remove_remote_sink(aconnector->dc_link, aconnector->dc_sink);
|
||||
|
||||
dc_sink_release(aconnector->dc_sink);
|
||||
aconnector->dc_sink = NULL;
|
||||
}
|
||||
|
||||
return connection_status;
|
||||
}
|
||||
|
||||
static int dm_dp_mst_atomic_check(struct drm_connector *connector,
|
||||
|
@ -634,7 +698,7 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn)
|
|||
return dsc_config.bits_per_pixel;
|
||||
}
|
||||
|
||||
static void increase_dsc_bpp(struct drm_atomic_state *state,
|
||||
static bool increase_dsc_bpp(struct drm_atomic_state *state,
|
||||
struct dc_link *dc_link,
|
||||
struct dsc_mst_fairness_params *params,
|
||||
struct dsc_mst_fairness_vars *vars,
|
||||
|
@ -694,7 +758,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
|
|||
params[next_index].port,
|
||||
vars[next_index].pbn,
|
||||
pbn_per_timeslot) < 0)
|
||||
return;
|
||||
return false;
|
||||
if (!drm_dp_mst_atomic_check(state)) {
|
||||
vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn);
|
||||
} else {
|
||||
|
@ -704,7 +768,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
|
|||
params[next_index].port,
|
||||
vars[next_index].pbn,
|
||||
pbn_per_timeslot) < 0)
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
vars[next_index].pbn += initial_slack[next_index];
|
||||
|
@ -713,7 +777,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
|
|||
params[next_index].port,
|
||||
vars[next_index].pbn,
|
||||
pbn_per_timeslot) < 0)
|
||||
return;
|
||||
return false;
|
||||
if (!drm_dp_mst_atomic_check(state)) {
|
||||
vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16;
|
||||
} else {
|
||||
|
@ -723,16 +787,17 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
|
|||
params[next_index].port,
|
||||
vars[next_index].pbn,
|
||||
pbn_per_timeslot) < 0)
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bpp_increased[next_index] = true;
|
||||
remaining_to_increase--;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void try_disable_dsc(struct drm_atomic_state *state,
|
||||
static bool try_disable_dsc(struct drm_atomic_state *state,
|
||||
struct dc_link *dc_link,
|
||||
struct dsc_mst_fairness_params *params,
|
||||
struct dsc_mst_fairness_vars *vars,
|
||||
|
@ -780,7 +845,7 @@ static void try_disable_dsc(struct drm_atomic_state *state,
|
|||
params[next_index].port,
|
||||
vars[next_index].pbn,
|
||||
dm_mst_get_pbn_divider(dc_link)) < 0)
|
||||
return;
|
||||
return false;
|
||||
|
||||
if (!drm_dp_mst_atomic_check(state)) {
|
||||
vars[next_index].dsc_enabled = false;
|
||||
|
@ -792,12 +857,13 @@ static void try_disable_dsc(struct drm_atomic_state *state,
|
|||
params[next_index].port,
|
||||
vars[next_index].pbn,
|
||||
dm_mst_get_pbn_divider(dc_link)) < 0)
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
tried[next_index] = true;
|
||||
remaining_to_try--;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
|
||||
|
@ -913,9 +979,11 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
|
|||
return false;
|
||||
|
||||
/* Optimize degree of compression */
|
||||
increase_dsc_bpp(state, dc_link, params, vars, count, k);
|
||||
if (!increase_dsc_bpp(state, dc_link, params, vars, count, k))
|
||||
return false;
|
||||
|
||||
try_disable_dsc(state, dc_link, params, vars, count, k);
|
||||
if (!try_disable_dsc(state, dc_link, params, vars, count, k))
|
||||
return false;
|
||||
|
||||
set_dsc_configs_from_fairness_vars(params, vars, count, k);
|
||||
|
||||
|
@ -1187,21 +1255,22 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void pre_validate_dsc(struct drm_atomic_state *state,
|
||||
bool pre_validate_dsc(struct drm_atomic_state *state,
|
||||
struct dm_atomic_state **dm_state_ptr,
|
||||
struct dsc_mst_fairness_vars *vars)
|
||||
{
|
||||
int i;
|
||||
struct dm_atomic_state *dm_state;
|
||||
struct dc_state *local_dc_state = NULL;
|
||||
int ret = 0;
|
||||
|
||||
if (!is_dsc_precompute_needed(state)) {
|
||||
DRM_INFO_ONCE("DSC precompute is not needed.\n");
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
if (dm_atomic_get_state(state, dm_state_ptr)) {
|
||||
DRM_INFO_ONCE("dm_atomic_get_state() failed\n");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
dm_state = *dm_state_ptr;
|
||||
|
||||
|
@ -1213,7 +1282,7 @@ void pre_validate_dsc(struct drm_atomic_state *state,
|
|||
|
||||
local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL);
|
||||
if (!local_dc_state)
|
||||
return;
|
||||
return false;
|
||||
|
||||
for (i = 0; i < local_dc_state->stream_count; i++) {
|
||||
struct dc_stream_state *stream = dm_state->context->streams[i];
|
||||
|
@ -1239,11 +1308,19 @@ void pre_validate_dsc(struct drm_atomic_state *state,
|
|||
&state->crtcs[ind].new_state->mode,
|
||||
dm_new_conn_state,
|
||||
dm_old_crtc_state->stream);
|
||||
if (local_dc_state->streams[i] == NULL) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ret != 0)
|
||||
goto clean_exit;
|
||||
|
||||
if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) {
|
||||
DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n");
|
||||
ret = -EINVAL;
|
||||
goto clean_exit;
|
||||
}
|
||||
|
||||
|
@ -1273,5 +1350,43 @@ void pre_validate_dsc(struct drm_atomic_state *state,
|
|||
}
|
||||
|
||||
kfree(local_dc_state);
|
||||
|
||||
return (ret == 0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
enum dc_status dm_dp_mst_is_port_support_mode(
|
||||
struct amdgpu_dm_connector *aconnector,
|
||||
struct dc_stream_state *stream)
|
||||
{
|
||||
int bpp, pbn, branch_max_throughput_mps = 0;
|
||||
|
||||
/* check if mode could be supported within fUll_pbn */
|
||||
bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
|
||||
pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
|
||||
if (pbn > aconnector->port->full_pbn)
|
||||
return DC_FAIL_BANDWIDTH_VALIDATE;
|
||||
|
||||
/* check is mst dsc output bandwidth branch_overall_throughput_0_mps */
|
||||
switch (stream->timing.pixel_encoding) {
|
||||
case PIXEL_ENCODING_RGB:
|
||||
case PIXEL_ENCODING_YCBCR444:
|
||||
branch_max_throughput_mps =
|
||||
aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_0_mps;
|
||||
break;
|
||||
case PIXEL_ENCODING_YCBCR422:
|
||||
case PIXEL_ENCODING_YCBCR420:
|
||||
branch_max_throughput_mps =
|
||||
aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_1_mps;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (branch_max_throughput_mps != 0 &&
|
||||
((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000))
|
||||
return DC_FAIL_BANDWIDTH_VALIDATE;
|
||||
|
||||
return DC_OK;
|
||||
}
|
||||
|
|
|
@ -59,8 +59,12 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
|
|||
|
||||
bool needs_dsc_aux_workaround(struct dc_link *link);
|
||||
|
||||
void pre_validate_dsc(struct drm_atomic_state *state,
|
||||
bool pre_validate_dsc(struct drm_atomic_state *state,
|
||||
struct dm_atomic_state **dm_state_ptr,
|
||||
struct dsc_mst_fairness_vars *vars);
|
||||
|
||||
enum dc_status dm_dp_mst_is_port_support_mode(
|
||||
struct amdgpu_dm_connector *aconnector,
|
||||
struct dc_stream_state *stream);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -79,10 +79,12 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link)
|
|||
link->psr_settings.psr_feature_enabled = true;
|
||||
}
|
||||
|
||||
DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d\n",
|
||||
DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
|
||||
link->psr_settings.psr_feature_enabled,
|
||||
link->psr_settings.psr_version,
|
||||
link->dpcd_caps.psr_info.psr_version);
|
||||
link->dpcd_caps.psr_info.psr_version,
|
||||
link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
|
||||
link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
|
||||
|
||||
}
|
||||
|
||||
|
@ -97,19 +99,24 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
|
|||
struct dc_link *link = NULL;
|
||||
struct psr_config psr_config = {0};
|
||||
struct psr_context psr_context = {0};
|
||||
struct dc *dc = NULL;
|
||||
bool ret = false;
|
||||
|
||||
if (stream == NULL)
|
||||
return false;
|
||||
|
||||
link = stream->link;
|
||||
dc = link->ctx->dc;
|
||||
|
||||
if (link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) {
|
||||
psr_config.psr_version = link->psr_settings.psr_version;
|
||||
psr_config.psr_frame_capture_indication_req = 0;
|
||||
psr_config.psr_rfb_setup_time = 0x37;
|
||||
psr_config.psr_sdp_transmit_line_num_deadline = 0x20;
|
||||
psr_config.allow_smu_optimizations = 0x0;
|
||||
mod_power_calc_psr_configs(&psr_config, link, stream);
|
||||
|
||||
/* linux DM specific updating for psr config fields */
|
||||
psr_config.allow_smu_optimizations =
|
||||
(amdgpu_dc_feature_mask & DC_PSR_ALLOW_SMU_OPT) &&
|
||||
mod_power_only_edp(dc->current_state, stream);
|
||||
psr_config.allow_multi_disp_optimizations =
|
||||
(amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT);
|
||||
|
||||
ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context);
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ DC_LIBS += dcn303
|
|||
DC_LIBS += dcn31
|
||||
DC_LIBS += dcn315
|
||||
DC_LIBS += dcn316
|
||||
DC_LIBS += dcn32
|
||||
DC_LIBS += dcn321
|
||||
endif
|
||||
|
||||
DC_LIBS += dce120
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -40,6 +40,7 @@ struct object_info_table {
|
|||
struct atom_data_revision revision;
|
||||
union {
|
||||
struct display_object_info_table_v1_4 *v1_4;
|
||||
struct display_object_info_table_v1_5 *v1_5;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -522,8 +522,8 @@ static enum bp_result transmitter_control_v2(
|
|||
*/
|
||||
params.acConfig.ucEncoderSel = 1;
|
||||
|
||||
if (CONNECTOR_ID_DISPLAY_PORT == connector_id
|
||||
|| CONNECTOR_ID_USBC == connector_id)
|
||||
if (CONNECTOR_ID_DISPLAY_PORT == connector_id ||
|
||||
CONNECTOR_ID_USBC == connector_id)
|
||||
/* Bit4: DP connector flag
|
||||
* =0 connector is none-DP connector
|
||||
* =1 connector is DP connector
|
||||
|
|
|
@ -77,6 +77,8 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
|
|||
case DCN_VERSION_3_1:
|
||||
case DCN_VERSION_3_15:
|
||||
case DCN_VERSION_3_16:
|
||||
case DCN_VERSION_3_2:
|
||||
case DCN_VERSION_3_21:
|
||||
*h = dal_cmd_tbl_helper_dce112_get_table2();
|
||||
return true;
|
||||
|
||||
|
|
|
@ -172,4 +172,38 @@ AMD_DAL_CLK_MGR_DCN316 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn316/,$(CLK_MGR_
|
|||
|
||||
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN316)
|
||||
|
||||
###############################################################################
|
||||
# DCN32
|
||||
###############################################################################
|
||||
CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o
|
||||
|
||||
AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32))
|
||||
|
||||
ifdef CONFIG_X86
|
||||
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse
|
||||
endif
|
||||
|
||||
ifdef CONFIG_PPC64
|
||||
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec
|
||||
endif
|
||||
|
||||
ifdef CONFIG_CC_IS_GCC
|
||||
ifeq ($(call cc-ifversion, -lt, 0701, y), y)
|
||||
IS_OLD_GCC = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_X86
|
||||
ifdef IS_OLD_GCC
|
||||
# Stack alignment mismatch, proceed with caution.
|
||||
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
|
||||
# (8B stack alignment).
|
||||
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4
|
||||
else
|
||||
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2
|
||||
endif
|
||||
endif
|
||||
|
||||
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
|
||||
|
||||
endif
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include "dcn31/dcn31_clk_mgr.h"
|
||||
#include "dcn315/dcn315_clk_mgr.h"
|
||||
#include "dcn316/dcn316_clk_mgr.h"
|
||||
#include "dcn32/dcn32_clk_mgr.h"
|
||||
|
||||
|
||||
int clk_mgr_helper_get_active_display_cnt(
|
||||
|
@ -316,8 +317,19 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
|
|||
return &clk_mgr->base.base;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case AMDGPU_FAMILY_GC_11_0_0: {
|
||||
struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
|
||||
|
||||
if (clk_mgr == NULL) {
|
||||
BREAK_TO_DEBUGGER();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dcn32_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
|
||||
return &clk_mgr->base;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
ASSERT(0); /* Unknown Asic */
|
||||
break;
|
||||
|
@ -360,6 +372,9 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
|
|||
dcn316_clk_mgr_destroy(clk_mgr);
|
||||
break;
|
||||
|
||||
case AMDGPU_FAMILY_GC_11_0_0:
|
||||
dcn32_clk_mgr_destroy(clk_mgr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -126,16 +126,24 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
|
|||
|
||||
void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct dc_state *context)
|
||||
{
|
||||
int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
|
||||
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
|
||||
int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
|
||||
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
|
||||
|
||||
uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
|
||||
uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
|
||||
int dpp_divider = 0;
|
||||
int disp_divider = 0;
|
||||
uint32_t dppclk_wdivider = 0;
|
||||
uint32_t dispclk_wdivider = 0;
|
||||
uint32_t current_dispclk_wdivider;
|
||||
uint32_t i;
|
||||
|
||||
if (clk_mgr->base.clks.dppclk_khz == 0 || clk_mgr->base.clks.dispclk_khz == 0)
|
||||
return;
|
||||
|
||||
dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
|
||||
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
|
||||
disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
|
||||
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
|
||||
|
||||
dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
|
||||
dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
|
||||
|
||||
REG_GET(DENTIST_DISPCLK_CNTL,
|
||||
DENTIST_DISPCLK_WDIVIDER, ¤t_dispclk_wdivider);
|
||||
|
||||
|
@ -436,7 +444,6 @@ void dcn2_read_clocks_from_hw_dentist(struct clk_mgr *clk_mgr_base)
|
|||
clk_mgr_base->clks.dppclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
|
||||
* clk_mgr->base.dentist_vco_freq_khz) / dpp_divider;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void dcn2_get_clock(struct clk_mgr *clk_mgr,
|
||||
|
|
|
@ -41,6 +41,12 @@
|
|||
#define FN(reg_name, field) \
|
||||
FD(reg_name##__##field)
|
||||
|
||||
#include "logger_types.h"
|
||||
#undef DC_LOGGER
|
||||
#define DC_LOGGER \
|
||||
CTX->logger
|
||||
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
|
||||
|
||||
#define VBIOSSMC_MSG_TestMessage 0x1
|
||||
#define VBIOSSMC_MSG_GetSmuVersion 0x2
|
||||
#define VBIOSSMC_MSG_PowerUpGfx 0x3
|
||||
|
@ -97,6 +103,12 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
|
|||
result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
|
||||
ASSERT(result == VBIOSSMC_Result_OK);
|
||||
|
||||
smu_print("SMU response after wait: %d\n", result);
|
||||
|
||||
if (result == VBIOSSMC_Status_BUSY) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* First clear response register */
|
||||
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
|
||||
|
||||
|
|
|
@ -129,7 +129,7 @@ static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
|
|||
|
||||
/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dummy_pstate_latency_us;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
|
||||
|
@ -137,6 +137,14 @@ static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
|
|||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
|
||||
clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
|
||||
|
||||
/* Set D - MALL - SR enter and exit times adjusted for MALL */
|
||||
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
|
||||
|
@ -517,6 +525,8 @@ static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct d
|
|||
if (!clk_mgr->smu_present)
|
||||
return;
|
||||
|
||||
/* TODO - DP2.0 HW: calculate link 128b/132 link rate in clock manager with new formula */
|
||||
|
||||
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
|
||||
|
||||
for (i = 0; i < MAX_PIPES * 2; i++) {
|
||||
|
@ -620,7 +630,8 @@ void dcn3_clk_mgr_construct(
|
|||
|
||||
void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
|
||||
{
|
||||
kfree(clk_mgr->base.bw_params);
|
||||
if (clk_mgr->base.bw_params)
|
||||
kfree(clk_mgr->base.bw_params);
|
||||
|
||||
if (clk_mgr->wm_range_table)
|
||||
dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART,
|
||||
|
|
|
@ -26,6 +26,66 @@
|
|||
#ifndef __DCN30_CLK_MGR_H__
|
||||
#define __DCN30_CLK_MGR_H__
|
||||
|
||||
//CLK1_CLK_PLL_REQ
|
||||
#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT
|
||||
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
|
||||
#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
|
||||
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
|
||||
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
|
||||
#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
|
||||
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
|
||||
//CLK1_CLK0_DFS_CNTL
|
||||
#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0
|
||||
#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL
|
||||
/*DPREF clock related*/
|
||||
#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
|
||||
#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
|
||||
#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
|
||||
#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
|
||||
#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
|
||||
#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
|
||||
#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
|
||||
#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
|
||||
|
||||
//CLK3_0_CLK3_CLK_PLL_REQ
|
||||
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
|
||||
#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
|
||||
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
|
||||
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
|
||||
#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
|
||||
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
|
||||
|
||||
#define mmCLK0_CLK2_DFS_CNTL 0x16C55
|
||||
#define mmCLK00_CLK0_CLK2_DFS_CNTL 0x16C55
|
||||
#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E55
|
||||
#define mmCLK02_CLK0_CLK2_DFS_CNTL 0x17055
|
||||
|
||||
#define mmCLK0_CLK3_DFS_CNTL 0x16C60
|
||||
#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60
|
||||
#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60
|
||||
#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060
|
||||
#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260
|
||||
|
||||
#define mmCLK0_CLK_PLL_REQ 0x16C10
|
||||
#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10
|
||||
#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10
|
||||
#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010
|
||||
#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210
|
||||
|
||||
#define mmCLK1_CLK_PLL_REQ 0x1B00D
|
||||
#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D
|
||||
#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
|
||||
#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D
|
||||
#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D
|
||||
|
||||
#define mmCLK2_CLK_PLL_REQ 0x17E0D
|
||||
|
||||
/*AMCLK*/
|
||||
|
||||
#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F
|
||||
#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
|
||||
|
||||
#endif
|
||||
void dcn3_init_clocks(struct clk_mgr *clk_mgr_base);
|
||||
|
||||
void dcn3_clk_mgr_construct(struct dc_context *ctx,
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
|
||||
#include "clk_mgr_internal.h"
|
||||
#include "reg_helper.h"
|
||||
#include "dm_helpers.h"
|
||||
|
||||
#include "dalsmc.h"
|
||||
#include "dcn30_smu11_driver_if.h"
|
||||
|
||||
|
@ -74,6 +76,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
|
|||
|
||||
static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
|
||||
{
|
||||
uint32_t result;
|
||||
/* Wait for response register to be ready */
|
||||
dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
|
||||
|
||||
|
@ -86,8 +89,14 @@ static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint
|
|||
/* Trigger the message transaction by writing the message ID */
|
||||
REG_WRITE(DAL_MSG_REG, msg_id);
|
||||
|
||||
result = dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
|
||||
|
||||
if (IS_SMU_TIMEOUT(result)) {
|
||||
dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000);
|
||||
}
|
||||
|
||||
/* Wait for response */
|
||||
if (dcn30_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
|
||||
if (result == DALSMC_Result_OK) {
|
||||
if (param_out)
|
||||
*param_out = REG_READ(DAL_ARG_REG);
|
||||
|
||||
|
|
|
@ -41,6 +41,12 @@
|
|||
#define FN(reg_name, field) \
|
||||
FD(reg_name##__##field)
|
||||
|
||||
#include "logger_types.h"
|
||||
#undef DC_LOGGER
|
||||
#define DC_LOGGER \
|
||||
CTX->logger
|
||||
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
|
||||
|
||||
#define VBIOSSMC_MSG_GetSmuVersion 0x2
|
||||
#define VBIOSSMC_MSG_SetDispclkFreq 0x4
|
||||
#define VBIOSSMC_MSG_SetDprefclkFreq 0x5
|
||||
|
@ -96,6 +102,12 @@ static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
|
|||
|
||||
result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
|
||||
|
||||
smu_print("SMU response after wait: %d\n", result);
|
||||
|
||||
if (result == VBIOSSMC_Status_BUSY) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* First clear response register */
|
||||
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
|
||||
|
||||
|
|
|
@ -40,6 +40,12 @@
|
|||
#define FN(reg_name, field) \
|
||||
FD(reg_name##__##field)
|
||||
|
||||
#include "logger_types.h"
|
||||
#undef DC_LOGGER
|
||||
#define DC_LOGGER \
|
||||
CTX->logger
|
||||
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
|
||||
|
||||
#define VBIOSSMC_MSG_TestMessage 0x1
|
||||
#define VBIOSSMC_MSG_GetSmuVersion 0x2
|
||||
#define VBIOSSMC_MSG_PowerUpGfx 0x3
|
||||
|
@ -104,6 +110,8 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
|
|||
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
|
||||
ASSERT(result == VBIOSSMC_Result_OK);
|
||||
|
||||
smu_print("SMU response after wait: %d\n", result);
|
||||
|
||||
if (result == VBIOSSMC_Status_BUSY) {
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -550,7 +550,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
|
|||
if (!bw_params->clk_table.entries[i].dtbclk_mhz)
|
||||
bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
|
||||
}
|
||||
ASSERT(bw_params->clk_table.entries[i].dcfclk_mhz);
|
||||
ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
|
||||
bw_params->vram_type = bios_info->memory_type;
|
||||
bw_params->num_channels = bios_info->ma_channel_number;
|
||||
if (!bw_params->num_channels)
|
||||
|
|
|
@ -70,6 +70,12 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
|
|||
#define REG_NBIO(reg_name) \
|
||||
(NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name)
|
||||
|
||||
#include "logger_types.h"
|
||||
#undef DC_LOGGER
|
||||
#define DC_LOGGER \
|
||||
CTX->logger
|
||||
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
|
||||
|
||||
#define mmMP1_C2PMSG_3 0x3B1050C
|
||||
|
||||
#define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
|
||||
|
@ -132,6 +138,8 @@ static int dcn315_smu_send_msg_with_param(
|
|||
result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
|
||||
ASSERT(result == VBIOSSMC_Result_OK);
|
||||
|
||||
smu_print("SMU response after wait: %d\n", result);
|
||||
|
||||
if (result == VBIOSSMC_Status_BUSY) {
|
||||
return -1;
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue