Merge tag 'amd-drm-next-5.20-2022-07-05' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-5.20-2022-07-05:

amdgpu:
- Various spelling and grammer fixes
- Various eDP fixes
- Various DMCUB fixes
- VCN fixes
- GMC 11 fixes
- RAS fixes
- TMZ support for GC 10.3.7
- GPUVM TLB flush fixes
- SMU 13.0.x updates
- DCN 3.2 Support
- DCN 3.2.1 Support
- MES updates
- GFX11 modifiers support
- USB-C fixes
- MMHUB 3.0.1 support
- SDMA 6.0 doorbell fixes
- Initial devcoredump support
- Enable high priority gfx queue on asics which support it
- Enable GPU reset for SMU 13.0.4
- OLED display fixes
- MPO fixes
- DC frame size fixes
- ASPM support for PCIE 7.4/7.6
- GPU reset support for SMU 13.0.0
- GFX11 updates
- VCN JPEG fix
- BACO support for SMU 13.0.7
- VCN instance handling fix
- GFX8 GPUVM TLB flush fix
- GPU reset rework
- VCN 4.0.2 support
- GTT size fixes
- DP link training fixes
- LSDMA 6.0.1 support
- Various backlight fixes
- Color encoding fixes
- Backlight config cleanup
- VCN 4.x unified queue cleanup

amdkfd:
- MMU notifier fixes
- Updates for GC 10.3.6 and 10.3.7
- P2P DMA support using dma-buf
- Add available memory IOCTL
- SDMA 6.0.1 fix
- MES fixes
- HMM profiler support

radeon:
- License fix
- Backlight config cleanup

UAPI:
- Add available memory IOCTL to amdkfd
  Proposed userspace: https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html
- HMM profiler support for amdkfd
  Proposed userspace: https://lists.freedesktop.org/archives/amd-gfx/2022-June/080805.html

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220705212633.6037-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2022-07-12 11:07:30 +10:00
commit 344feb7ccf
334 changed files with 354593 additions and 2370 deletions

View file

@ -88,7 +88,8 @@ amdgpu-y += \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
mmhub_v3_0_1.o
# add UMC block
amdgpu-y += \

View file

@ -223,6 +223,9 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */
static const bool __maybe_unused no_system_mem_limit;
#endif
#ifdef CONFIG_HSA_AMD_P2P
extern bool pcie_p2p;
#endif
extern int amdgpu_tmz;
extern int amdgpu_reset_method;
@ -274,7 +277,7 @@ extern int amdgpu_vcnfw_log;
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
/* smasrt shift bias level limits */
/* smart shift bias level limits */
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
@ -667,6 +670,7 @@ enum amd_hw_ip_block_type {
RSMU_HWIP,
XGMI_HWIP,
DCI_HWIP,
PCIE_HWIP,
MAX_HWIP
};
@ -1044,10 +1048,18 @@ struct amdgpu_device {
/* reset dump register */
uint32_t *reset_dump_reg_list;
uint32_t *reset_dump_reg_value;
int num_regs;
#ifdef CONFIG_DEV_COREDUMP
struct amdgpu_task_info reset_task_info;
bool reset_vram_lost;
struct timespec64 reset_time;
#endif
bool scpm_enabled;
uint32_t scpm_status;
struct work_struct reset_work;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@ -1242,7 +1254,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job);
int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);

View file

@ -66,9 +66,7 @@ struct amdgpu_atif {
struct amdgpu_atif_notifications notifications;
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
struct backlight_device *bd;
#endif
struct amdgpu_dm_backlight_caps backlight_caps;
};
@ -436,7 +434,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
@ -447,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
*/
backlight_device_set_brightness(atif->bd, req.backlight_level);
}
#endif
}
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
@ -849,7 +845,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
{
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->notifications.brightness_change) {
if (amdgpu_device_has_dc_support(adev)) {
#if defined(CONFIG_DRM_AMD_DC)
@ -876,7 +871,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
}
}
}
#endif
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
register_acpi_notifier(&adev->acpi_nb);

View file

@ -33,6 +33,7 @@
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu_reset.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
}
}
static void amdgpu_amdkfd_reset_work(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
kfd.reset_work);
amdgpu_device_gpu_recover(adev, NULL);
}
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
adev_to_drm(adev), &gpu_resources);
INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{
if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->kfd.reset_work);
}
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
@ -671,6 +684,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
goto err_ib_sched;
}
/* Drop the initial kref_init count (see drm_sched_main as example) */
dma_fence_put(f);
ret = dma_fence_wait(f, false);
err_ib_sched:
@ -714,7 +729,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
{
bool all_hub = false;
if (adev->family == AMDGPU_FAMILY_AI)
if (adev->family == AMDGPU_FAMILY_AI ||
adev->family == AMDGPU_FAMILY_RV)
all_hub = true;
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);

View file

@ -48,6 +48,7 @@ enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
};
struct kfd_mem_attachment {
@ -96,6 +97,7 @@ struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
bool init_complete;
struct work_struct reset_work;
};
enum kgd_engine_type {
@ -266,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
@ -279,10 +282,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size);
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
void **kptr, uint64_t *size);
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
@ -332,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
}
#endif
/* KGD2KFD callbacks */
int kgd2kfd_quiesce_mm(struct mm_struct *mm);
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);

View file

@ -32,12 +32,19 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
#include "kfd_smi_events.h"
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
/*
* Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
* BO chunk
*/
#define VRAM_ALLOCATION_ALIGN (1 << 21)
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
@ -108,7 +115,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
static size_t amdgpu_amdkfd_acc_size(uint64_t size)
{
@ -148,7 +155,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
vram_needed = size;
/*
* Conservatively round up the allocation requirement to 2 MB
* to avoid fragmentation caused by 4K allocations in the tail
* 2M BO chunk.
*/
vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size;
@ -173,7 +186,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev->kfd.vram_used + vram_needed >
adev->gmc.real_vram_size - reserved_for_pt)) {
adev->gmc.real_vram_size -
atomic64_read(&adev->vram_pin_size) -
reserved_for_pt)) {
ret = -ENOMEM;
goto release;
}
@ -205,7 +220,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.ttm_mem_used -= acc_size;
adev->kfd.vram_used -= size;
adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= acc_size;
@ -241,6 +256,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
kfree(bo->kfd_bo);
}
/**
* @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
* about USERPTR or DOOREBELL or MMIO BO.
* @adev: Device for which dmamap BO is being created
* @mem: BO of peer device that is being DMA mapped. Provides parameters
* in building the dmamap BO
* @bo_out: Output parameter updated with handle of dmamap BO
*/
static int
create_dmamap_sg_bo(struct amdgpu_device *adev,
struct kgd_mem *mem, struct amdgpu_bo **bo_out)
{
struct drm_gem_object *gem_obj;
int ret, align;
ret = amdgpu_bo_reserve(mem->bo, false);
if (ret)
return ret;
align = 1;
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
amdgpu_bo_unreserve(mem->bo);
if (ret) {
pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
return -EINVAL;
}
*bo_out = gem_to_amdgpu_bo(gem_obj);
(*bo_out)->parent = amdgpu_bo_ref(mem->bo);
return ret;
}
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
* reservation object.
*
@ -446,6 +497,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
return pte_flags;
}
/**
* create_sg_table() - Create an sg_table for a contiguous DMA addr range
* @addr: The starting address to point to
* @size: Size of memory area in bytes being pointed to
*
* Allocates an instance of sg_table and initializes it to point to memory
* area specified by input parameters. The address used to build is assumed
* to be DMA mapped, if needed.
*
* DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
* because they are physically contiguous.
*
* Return: Initialized instance of SG Table or NULL
*/
static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
{
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return NULL;
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
kfree(sg);
return NULL;
}
sg_dma_address(sg->sgl) = addr;
sg->sgl->length = size;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->sgl->dma_length = size;
#endif
return sg;
}
static int
kfd_mem_dmamap_userptr(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@ -510,6 +593,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
/**
* kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
* @mem: SG BO of the DOORBELL or MMIO resource on the owning device
* @attachment: Virtual address attachment of the BO on accessing device
*
* An access request from the device that owns DOORBELL does not require DMA mapping.
* This is because the request doesn't go through PCIe root complex i.e. it instead
* loops back. The need to DMA map arises only when accessing peer device's DOORBELL
*
* In contrast, all access requests for MMIO need to be DMA mapped without regard to
* device ownership. This is because access requests for MMIO go through PCIe root
* complex.
*
* This is accomplished in two steps:
* - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
* in updating requesting device's page table
* - Signal TTM to mark memory pointed to by requesting device's BO as GPU
* accessible. This allows an update of requesting device's page table
* with entries associated with DOOREBELL or MMIO memory
*
* This method is invoked in the following contexts:
* - Mapping of DOORBELL or MMIO BO of same or peer device
* - Validating an evicted DOOREBELL or MMIO BO on device seeking access
*
* Return: ZERO if successful, NON-ZERO otherwise
*/
static int
kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
struct amdgpu_device *adev = attachment->adev;
struct ttm_tt *ttm = bo->tbo.ttm;
enum dma_data_direction dir;
dma_addr_t dma_addr;
bool mmio;
int ret;
/* Expect SG Table of dmapmap BO to be NULL */
mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
if (unlikely(ttm->sg)) {
pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
return -EINVAL;
}
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
dma_addr = mem->bo->tbo.sg->sgl->dma_address;
pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
dma_addr = dma_map_resource(adev->dev, dma_addr,
mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
ret = dma_mapping_error(adev->dev, dma_addr);
if (unlikely(ret))
return ret;
pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
if (unlikely(!ttm->sg)) {
ret = -ENOMEM;
goto unmap_sg;
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (unlikely(ret))
goto free_sg;
return ret;
free_sg:
sg_free_table(ttm->sg);
kfree(ttm->sg);
ttm->sg = NULL;
unmap_sg:
dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
dir, DMA_ATTR_SKIP_CPU_SYNC);
return ret;
}
static int
kfd_mem_dmamap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@ -521,6 +685,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
return kfd_mem_dmamap_userptr(mem, attachment);
case KFD_MEM_ATT_DMABUF:
return kfd_mem_dmamap_dmabuf(attachment);
case KFD_MEM_ATT_SG:
return kfd_mem_dmamap_sg_bo(mem, attachment);
default:
WARN_ON_ONCE(1);
}
@ -561,6 +727,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
/**
* kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
* @mem: SG BO of the DOORBELL or MMIO resource on the owning device
* @attachment: Virtual address attachment of the BO on accessing device
*
* The method performs following steps:
* - Signal TTM to mark memory pointed to by BO as GPU inaccessible
* - Free SG Table that is used to encapsulate DMA mapped memory of
* peer device's DOORBELL or MMIO memory
*
* This method is invoked in the following contexts:
* UNMapping of DOORBELL or MMIO BO on a device having access to its memory
* Eviction of DOOREBELL or MMIO BO on device having access to its memory
*
* Return: void
*/
static void
kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
struct amdgpu_device *adev = attachment->adev;
struct ttm_tt *ttm = bo->tbo.ttm;
enum dma_data_direction dir;
if (unlikely(!ttm->sg)) {
pr_err("SG Table of BO is UNEXPECTEDLY NULL");
return;
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(ttm->sg);
kfree(ttm->sg);
ttm->sg = NULL;
bo->tbo.sg = NULL;
}
static void
kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@ -574,38 +784,14 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
case KFD_MEM_ATT_DMABUF:
kfd_mem_dmaunmap_dmabuf(attachment);
break;
case KFD_MEM_ATT_SG:
kfd_mem_dmaunmap_sg_bo(mem, attachment);
break;
default:
WARN_ON_ONCE(1);
}
}
static int
kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
struct amdgpu_bo **bo)
{
unsigned long bo_size = mem->bo->tbo.base.size;
struct drm_gem_object *gobj;
int ret;
ret = amdgpu_bo_reserve(mem->bo, false);
if (ret)
return ret;
ret = amdgpu_gem_object_create(adev, bo_size, 1,
AMDGPU_GEM_DOMAIN_CPU,
AMDGPU_GEM_CREATE_PREEMPTIBLE,
ttm_bo_type_sg, mem->bo->tbo.base.resv,
&gobj);
amdgpu_bo_unreserve(mem->bo);
if (ret)
return ret;
*bo = gem_to_amdgpu_bo(gobj);
(*bo)->parent = amdgpu_bo_ref(mem->bo);
return 0;
}
static int
kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
struct amdgpu_bo **bo)
@ -656,6 +842,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
bool same_hive = false;
int i, ret;
if (!va) {
@ -663,6 +850,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
return -EINVAL;
}
/* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
*
* The access path of MMIO and DOORBELL BOs of is always over PCIe.
* In contrast the access path of VRAM BOs depens upon the type of
* link that connects the peer device. Access over PCIe is allowed
* if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device
*/
if ((adev != bo_adev) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
return -EINVAL;
}
for (i = 0; i <= is_aql; i++) {
attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
if (unlikely(!attachment[i])) {
@ -673,9 +878,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, vm);
if (adev == bo_adev ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
(mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) {
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
same_hive) {
/* Mappings on the local GPU, or VRAM mappings in the
* local hive, or userptr mapping IOMMU direct map mode
* share the original BO
@ -691,26 +896,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
/* Create an SG BO to DMA-map userptrs on other GPUs */
attachment[i]->type = KFD_MEM_ATT_USERPTR;
ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
if (ret)
goto unwind;
} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
mem->bo->tbo.type != ttm_bo_type_sg) {
/* GTT BOs use DMA-mapping ability of dynamic-attach
* DMA bufs. TODO: The same should work for VRAM on
* large-BAR GPUs.
*/
/* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
} else if (mem->bo->tbo.type == ttm_bo_type_sg) {
WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
"Handing invalid SG BO in ATTACH request");
attachment[i]->type = KFD_MEM_ATT_SG;
ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
if (ret)
goto unwind;
/* Enable acces to GTT and VRAM BOs of peer devices */
} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
attachment[i]->type = KFD_MEM_ATT_DMABUF;
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
if (ret)
goto unwind;
pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
} else {
/* FIXME: Need to DMA-map other BO types:
* large-BAR VRAM, doorbells, MMIO remap
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
bo[i] = mem->bo;
drm_gem_object_get(&bo[i]->tbo.base);
WARN_ONCE(true, "Handling invalid ATTACH request");
ret = -EINVAL;
goto unwind;
}
/* Add BO to VM internal data structures */
@ -1111,24 +1320,6 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
return ret;
}
static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
{
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return NULL;
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
kfree(sg);
return NULL;
}
sg->sgl->dma_address = addr;
sg->sgl->length = size;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->sgl->dma_length = size;
#endif
return sg;
}
static int process_validate_vms(struct amdkfd_process_info *process_info)
{
struct amdgpu_vm *peer_vm;
@ -1457,6 +1648,22 @@ int amdgpu_amdkfd_criu_resume(void *p)
return ret;
}
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t available;
spin_lock(&kfd_mem_limit.mem_limit_lock);
available = adev->gmc.real_vram_size
- adev->kfd.vram_used
- atomic64_read(&adev->vram_pin_size)
- reserved_for_pt;
spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
@ -1497,7 +1704,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo_type = ttm_bo_type_sg;
if (size > UINT_MAX)
return -EINVAL;
sg = create_doorbell_sg(*offset, size);
sg = create_sg_table(*offset, size);
if (!sg)
return -ENOMEM;
} else {
@ -1907,8 +2114,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
}
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size)
/**
* amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
* @adev: Device to which allocated BO belongs
* @bo: Buffer object to be mapped
*
* Before return, bo reference count is incremented. To release the reference and unpin/
* unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
*/
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
{
int ret;
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("Failed to reserve bo. ret %d\n", ret);
goto err_reserve_bo_failed;
}
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (ret) {
pr_err("Failed to pin bo. ret %d\n", ret);
goto err_pin_bo_failed;
}
ret = amdgpu_ttm_alloc_gart(&bo->tbo);
if (ret) {
pr_err("Failed to bind bo to GART. ret %d\n", ret);
goto err_map_bo_gart_failed;
}
amdgpu_amdkfd_remove_eviction_fence(
bo, bo->kfd_bo->process_info->eviction_fence);
amdgpu_bo_unreserve(bo);
bo = amdgpu_bo_ref(bo);
return 0;
err_map_bo_gart_failed:
amdgpu_bo_unpin(bo);
err_pin_bo_failed:
amdgpu_bo_unreserve(bo);
err_reserve_bo_failed:
return ret;
}
/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
*
* @mem: Buffer object to be mapped for CPU access
* @kptr[out]: pointer in kernel CPU address space
* @size[out]: size of the buffer
*
* Pins the BO and maps it for kernel CPU access. The eviction fence is removed
* from the BO, since pinned BOs cannot be evicted. The bo must remain on the
* validate_list, so the GPU mapping can be restored after a page table was
* evicted.
*
* Return: 0 on success, error code on failure
*/
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
void **kptr, uint64_t *size)
{
int ret;
struct amdgpu_bo *bo = mem->bo;
@ -1959,8 +2227,15 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
return ret;
}
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem)
/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
*
* @mem: Buffer object to be unmapped for CPU access
*
* Removes the kernel CPU mapping and unpins the BO. It does not restore the
* eviction fence, so this function should only be used for cleanup before the
* BO is destroyed.
*/
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
@ -2072,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mm);
r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work,
@ -2346,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
unlock_out:
mutex_unlock(&process_info->lock);
mmput(mm);
put_task_struct(usertask);
/* If validation failed, reschedule another attempt */
if (evicted_bos)
if (evicted_bos) {
schedule_delayed_work(&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm);
}
mmput(mm);
put_task_struct(usertask);
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given

View file

@ -110,7 +110,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
{
switch (prio) {
case AMDGPU_CTX_PRIORITY_HIGH:
@ -143,8 +143,9 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
ctx->init_priority : ctx->override_priority;
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_COMPUTE:
hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
break;
case AMDGPU_HW_IP_VCE:
case AMDGPU_HW_IP_VCN_ENC:
@ -779,7 +780,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
amdgpu_ctx_to_drm_sched_prio(priority));
/* set hw priority */
if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;

View file

@ -1709,17 +1709,24 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
i++;
} while (len < size);
new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
goto error_free;
}
ret = down_write_killable(&adev->reset_domain->sem);
if (ret)
goto error_free;
swap(adev->reset_dump_reg_list, tmp);
swap(adev->reset_dump_reg_value, new);
adev->num_regs = i;
up_write(&adev->reset_domain->sem);
ret = size;
error_free:
kfree(tmp);
kfree(new);
return ret;
}

View file

@ -32,6 +32,9 @@
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/pci.h>
#include <linux/devcoredump.h>
#include <generated/utsrelease.h>
#include <linux/pci-p2pdma.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_probe_helper.h>
@ -1942,35 +1945,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
}
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_OLAND:
case CHIP_HAINAN:
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
case CHIP_HAWAII:
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_VEGA20:
case CHIP_ALDEBARAN:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
case CHIP_BEIGE_GOBY:
default:
return 0;
case CHIP_VEGA10:
@ -3316,38 +3290,12 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_MULLINS:
/*
* We have systems in the wild with these ASICs that require
* LVDS and VGA support which is not supported with DC.
* VGA support which is not supported with DC.
*
* Fallback to the non-DC driver here by default so as not to
* cause regressions.
*/
return amdgpu_dc > 0;
case CHIP_HAWAII:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
#if defined(CONFIG_DRM_AMD_DC_DCN)
case CHIP_RAVEN:
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_RENOIR:
case CHIP_CYAN_SKILLFISH:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
case CHIP_BEIGE_GOBY:
case CHIP_VANGOGH:
case CHIP_YELLOW_CARP:
#endif
default:
return amdgpu_dc != 0;
#else
@ -3369,7 +3317,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
*/
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev) ||
if (amdgpu_sriov_vf(adev) ||
adev->enable_virtual_display ||
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
@ -3667,14 +3615,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_mcbp)
DRM_INFO("MCBP is enabled\n");
if (adev->asic_type >= CHIP_NAVI10) {
if (amdgpu_mes || amdgpu_mes_kiq)
adev->enable_mes = true;
if (amdgpu_mes_kiq)
adev->enable_mes_kiq = true;
}
/*
* Reset domain needs to be present early, before XGMI hive discovered
* (if any) and intitialized to use reset sem and in_gpu reset flag
@ -4666,6 +4606,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
amdgpu_virt_fini_data_exchange(adev);
}
amdgpu_fence_driver_isr_toggle(adev, true);
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@ -4681,6 +4623,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
amdgpu_fence_driver_force_completion(ring);
}
amdgpu_fence_driver_isr_toggle(adev, false);
if (job && job->vm)
drm_sched_increase_karma(&job->base);
@ -4721,20 +4665,73 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
{
uint32_t reg_value;
int i;
lockdep_assert_held(&adev->reset_domain->sem);
dump_stack();
for (i = 0; i < adev->num_regs; i++) {
reg_value = RREG32(adev->reset_dump_reg_list[i]);
trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
adev->reset_dump_reg_value[i]);
}
return 0;
}
#ifdef CONFIG_DEV_COREDUMP
static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct drm_printer p;
struct amdgpu_device *adev = data;
struct drm_print_iterator iter;
int i;
iter.data = buffer;
iter.offset = 0;
iter.start = offset;
iter.remain = count;
p = drm_coredump_printer(&iter);
drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
drm_printf(&p, "kernel: " UTS_RELEASE "\n");
drm_printf(&p, "module: " KBUILD_MODNAME "\n");
drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
if (adev->reset_task_info.pid)
drm_printf(&p, "process_name: %s PID: %d\n",
adev->reset_task_info.process_name,
adev->reset_task_info.pid);
if (adev->reset_vram_lost)
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
if (adev->num_regs) {
drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
for (i = 0; i < adev->num_regs; i++)
drm_printf(&p, "0x%08x: 0x%08x\n",
adev->reset_dump_reg_list[i],
adev->reset_dump_reg_value[i]);
}
return count - iter.remain;
}
static void amdgpu_devcoredump_free(void *data)
{
}
static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
ktime_get_ts64(&adev->reset_time);
dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
amdgpu_devcoredump_read, amdgpu_devcoredump_free);
}
#endif
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context)
{
@ -4819,6 +4816,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
#ifdef CONFIG_DEV_COREDUMP
tmp_adev->reset_vram_lost = vram_lost;
memset(&tmp_adev->reset_task_info, 0,
sizeof(tmp_adev->reset_task_info));
if (reset_context->job && reset_context->job->vm)
tmp_adev->reset_task_info =
reset_context->job->vm->task_info;
amdgpu_reset_capture_coredumpm(tmp_adev);
#endif
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU reset!\n");
amdgpu_inc_vram_lost(tmp_adev);
@ -5004,16 +5010,32 @@ static void amdgpu_device_recheck_guilty_jobs(
/* clear job's guilty and depend the folowing step to decide the real one */
drm_sched_reset_karma(s_job);
/* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
* to make sure fence is balanced */
dma_fence_get(s_job->s_fence->parent);
drm_sched_resubmit_jobs_ext(&ring->sched, 1);
if (!s_job->s_fence->parent) {
DRM_WARN("Failed to get a HW fence for job!");
continue;
}
ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
if (ret == 0) { /* timeout */
DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
ring->sched.name, s_job->id);
amdgpu_fence_driver_isr_toggle(adev, true);
/* Clear this failed job from fence array */
amdgpu_fence_driver_clear_job_fences(ring);
amdgpu_fence_driver_isr_toggle(adev, false);
/* Since the job won't signal and we go for
* another resubmit drop this parent pointer
*/
dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL;
/* set guilty */
drm_sched_increase_karma(s_job);
retry:
@ -5042,7 +5064,6 @@ static void amdgpu_device_recheck_guilty_jobs(
/* got the hw fence, signal finished fence */
atomic_dec(ring->sched.score);
dma_fence_put(s_job->s_fence->parent);
dma_fence_get(&s_job->s_fence->finished);
dma_fence_signal(&s_job->s_fence->finished);
dma_fence_put(&s_job->s_fence->finished);
@ -5055,8 +5076,29 @@ static void amdgpu_device_recheck_guilty_jobs(
}
}
static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
#if defined(CONFIG_DEBUG_FS)
if (!amdgpu_sriov_vf(adev))
cancel_work(&adev->reset_work);
#endif
if (adev->kfd.dev)
cancel_work(&adev->kfd.reset_work);
if (amdgpu_sriov_vf(adev))
cancel_work(&adev->virt.flr_work);
if (con && adev->ras_enabled)
cancel_work(&con->recovery_work);
}
/**
* amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu_device pointer
* @job: which job trigger hang
@ -5066,7 +5108,7 @@ static void amdgpu_device_recheck_guilty_jobs(
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct list_head device_list, *device_list_handle = NULL;
@ -5164,7 +5206,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
*/
amdgpu_unregister_gpu_instance(tmp_adev);
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
/* disable ras on ALL IPs */
if (!need_emergency_restart &&
@ -5194,8 +5236,8 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
if (job && job->base.s_fence->parent &&
dma_fence_is_signaled(job->base.s_fence->parent)) {
if (job && (job->hw_fence.ops != NULL) &&
dma_fence_is_signaled(&job->hw_fence)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@ -5210,6 +5252,12 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
r, adev_to_drm(tmp_adev)->unique);
tmp_adev->asic_reset_res = r;
}
/*
* Drop all pending non scheduler resets. Scheduler resets
* were already dropped during drm_sched_stop
*/
amdgpu_device_stop_pending_resets(tmp_adev);
}
tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
@ -5308,40 +5356,11 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
atomic_set(&adev->reset_domain->reset_res, r);
return r;
}
struct amdgpu_recover_work_struct {
struct work_struct base;
struct amdgpu_device *adev;
struct amdgpu_job *job;
int ret;
};
static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
{
struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
}
/*
* Serialize gpu recover into reset domain single threaded wq
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
return -EAGAIN;
flush_work(&work.base);
return work.ret;
}
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
@ -5490,6 +5509,36 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
/**
* amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
*
* @adev: amdgpu_device pointer
* @peer_adev: amdgpu_device pointer for peer device trying to access @adev
*
* Return true if @peer_adev can access (DMA) @adev through the PCIe
* BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
* @peer_adev.
*/
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev)
{
#ifdef CONFIG_HSA_AMD_P2P
uint64_t address_mask = peer_adev->dev->dma_mask ?
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
adev->gmc.aper_base + adev->gmc.aper_size - 1;
bool p2p_access = !(pci_p2pdma_distance_many(adev->pdev,
&peer_adev->dev, 1, true) < 0);
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
!(adev->gmc.aper_base & address_mask ||
aper_limit & address_mask));
#else
return false;
#endif
}
int amdgpu_device_baco_enter(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);

View file

@ -194,6 +194,7 @@ static int hw_id_map[MAX_HWIP] = {
[UMC_HWIP] = UMC_HWID,
[XGMI_HWIP] = XGMI_HWID,
[DCI_HWIP] = DCI_HWID,
[PCIE_HWIP] = PCIE_HWID,
};
static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary)
@ -1435,6 +1436,11 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return -EINVAL;
}
/* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
* which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
* but that may change in the future with new GPUs so keep this
* check for defensive purposes.
*/
if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
dev_err(adev->dev, "invalid vcn instances\n");
return -EINVAL;
@ -1450,6 +1456,9 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
case 1:
/* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
* so this won't overflow.
*/
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
adev->vcn.vcn_codec_disable_mask[v] =
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
@ -1709,6 +1718,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
amdgpu_device_ip_block_add(adev, &dm_ip_block);
break;
default:
@ -1886,6 +1897,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
break;
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 2):
case IP_VERSION(4, 0, 4):
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
@ -2321,6 +2333,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
switch (adev->ip_versions[LSDMA_HWIP][0]) {
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
adev->lsdma.funcs = &lsdma_v6_0_funcs;
break;

View file

@ -30,6 +30,9 @@
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "soc15_common.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
#include <asm/div64.h>
#include <linux/pci.h>
@ -663,6 +666,11 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
uint64_t modifier = 0;
int num_pipes = 0;
int num_pkrs = 0;
num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
modifier = DRM_FORMAT_MOD_LINEAR;
@ -675,7 +683,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
int bank_xor_bits = 0;
int packers = 0;
int rb = 0;
int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
int pipes = ilog2(num_pipes);
uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
switch (swizzle >> 2) {
@ -691,12 +699,17 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 6: /* 64 KiB _X */
block_size_bits = 16;
break;
case 7: /* 256 KiB */
block_size_bits = 18;
break;
default:
/* RESERVED or VAR */
return -EINVAL;
}
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX11;
else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10;
@ -707,19 +720,32 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 0: /* Z microtiling */
return -EINVAL;
case 1: /* S microtiling */
if (!has_xor)
version = AMD_FMT_MOD_TILE_VER_GFX9;
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
if (!has_xor)
version = AMD_FMT_MOD_TILE_VER_GFX9;
}
break;
case 2:
if (!has_xor && afb->base.format->cpp[0] != 4)
version = AMD_FMT_MOD_TILE_VER_GFX9;
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
if (!has_xor && afb->base.format->cpp[0] != 4)
version = AMD_FMT_MOD_TILE_VER_GFX9;
}
break;
case 3:
break;
}
if (has_xor) {
if (num_pipes == num_pkrs && num_pkrs == 0) {
DRM_ERROR("invalid number of pipes and packers\n");
return -EINVAL;
}
switch (version) {
case AMD_FMT_MOD_TILE_VER_GFX11:
pipe_xor_bits = min(block_size_bits - 8, pipes);
packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
break;
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
pipe_xor_bits = min(block_size_bits - 8, pipes);
packers = min(block_size_bits - 8 - pipe_xor_bits,
@ -753,9 +779,10 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
u64 render_dcc_offset;
/* Enable constant encode on RAVEN2 and later. */
bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN ||
bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN ||
(adev->asic_type == CHIP_RAVEN &&
adev->external_rev_id >= 0x81);
adev->external_rev_id >= 0x81)) &&
adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0);
int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
@ -870,10 +897,11 @@ static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
}
case AMD_FMT_MOD_TILE_VER_GFX10:
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: {
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
case AMD_FMT_MOD_TILE_VER_GFX11: {
int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
++pipes_log2;
@ -966,6 +994,9 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
case DC_SW_64KB_S_X:
block_size_log2 = 16;
break;
case DC_SW_VAR_S_X:
block_size_log2 = 18;
break;
default:
drm_dbg_kms(rfb->base.dev,
"Swizzle mode with unknown block size: %d\n", swizzle);

View file

@ -35,8 +35,6 @@
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags);

View file

@ -802,6 +802,16 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (
module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif
/**
* DOC: pcie_p2p (bool)
* Enable PCIe P2P (requires large-BAR). Default value: true (on)
*/
#ifdef CONFIG_HSA_AMD_P2P
bool pcie_p2p = true;
module_param(pcie_p2p, bool, 0444);
MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
#endif
/**
* DOC: dcfeaturemask (uint)
* Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.

View file

@ -39,6 +39,7 @@
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_reset.h"
/*
* Fences
@ -163,11 +164,16 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
if (job && job->job_run_counter) {
/* reinit seq for resubmitted jobs */
fence->seqno = seq;
/* TO be inline with external fence creation and other drivers */
dma_fence_get(fence);
} else {
if (job)
if (job) {
dma_fence_init(fence, &amdgpu_job_fence_ops,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
/* Against remove in amdgpu_job_{free, free_cb} */
dma_fence_get(fence);
}
else
dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
@ -531,6 +537,24 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
}
}
/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
{
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
continue;
if (stop)
disable_irq(adev->irq.irq);
else
enable_irq(adev->irq.irq);
}
}
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
{
unsigned int i, j;
@ -594,8 +618,10 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
ptr = &ring->fence_drv.fences[i];
old = rcu_dereference_protected(*ptr, 1);
if (old && old->ops == &amdgpu_job_fence_ops)
if (old && old->ops == &amdgpu_job_fence_ops) {
RCU_INIT_POINTER(*ptr, NULL);
dma_fence_put(old);
}
}
}
@ -798,7 +824,10 @@ static int gpu_recover_get(void *data, u64 *val)
return 0;
}
*val = amdgpu_device_gpu_recover(adev, NULL);
if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
flush_work(&adev->reset_work);
*val = atomic_read(&adev->reset_domain->reset_res);
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
@ -810,6 +839,14 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
"%lld\n");
static void amdgpu_debugfs_reset_work(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
amdgpu_device_gpu_recover(adev, NULL);
}
#endif
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
@ -821,9 +858,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
&amdgpu_debugfs_fence_info_fops);
if (!amdgpu_sriov_vf(adev))
if (!amdgpu_sriov_vf(adev)) {
INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
&amdgpu_debugfs_gpu_recover_fops);
}
#endif
}

View file

@ -142,7 +142,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
}
}
static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
{
return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
}
static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
{
if (amdgpu_compute_multipipe != -1) {
DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
@ -158,6 +163,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
return adev->gfx.mec.num_mec > 1;
}
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
int queue = ring->queue;
int pipe = ring->pipe;
/* Policy: use pipe1 queue0 as high priority graphics queue if we
* have more than one gfx pipe.
*/
if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
int me = ring->me;
int bit;
bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
if (ring == &adev->gfx.gfx_ring[bit])
return true;
}
return false;
}
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@ -174,7 +201,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe,
adev->gfx.num_compute_rings);
@ -200,18 +227,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, me;
int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
adev->gfx.me.num_queue_per_pipe;
for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
queue = i % adev->gfx.me.num_queue_per_pipe;
me = (i / adev->gfx.me.num_queue_per_pipe)
/ adev->gfx.me.num_pipe_per_me;
if (me >= adev->gfx.me.num_me)
break;
if (multipipe_policy) {
/* policy: amdgpu owns the first queue per pipe at this stage
* will extend to mulitple queues per pipe later */
if (me == 0 && queue < 1)
for (i = 0; i < max_queues_per_me; i++) {
pipe = i % adev->gfx.me.num_pipe_per_me;
queue = (i / adev->gfx.me.num_pipe_per_me) %
adev->gfx.me.num_queue_per_pipe;
set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
adev->gfx.me.queue_bitmap);
}
} else {
for (i = 0; i < max_queues_per_me; ++i)
set_bit(i, adev->gfx.me.queue_bitmap);
}
@ -666,6 +699,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
if (amdgpu_device_skip_hw_access(adev))
return 0;
if (adev->mes.ring.sched.ready)
return amdgpu_mes_rreg(adev, reg);
BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
@ -733,6 +769,11 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
if (amdgpu_device_skip_hw_access(adev))
return;
if (adev->mes.ring.sched.ready) {
amdgpu_mes_wreg(adev, reg, v);
return;
}
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);

View file

@ -396,6 +396,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
int pipe, int queue);
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,

View file

@ -242,7 +242,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
* @entry: IV entry
*
* Decodes the interrupt vector at the current rptr
* position and also advance the position for for Vega10
* position and also advance the position for Vega10
* and later GPUs.
*/
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,

View file

@ -24,12 +24,18 @@
#ifndef __AMDGPU_IMU_H__
#define __AMDGPU_IMU_H__
enum imu_work_mode {
DEBUG_MODE,
MISSION_MODE
};
struct amdgpu_imu_funcs {
int (*init_microcode)(struct amdgpu_device *adev);
int (*load_microcode)(struct amdgpu_device *adev);
void (*setup_imu)(struct amdgpu_device *adev);
int (*start_imu)(struct amdgpu_device *adev);
void (*program_rlc_ram)(struct amdgpu_device *adev);
int (*wait_for_reset_status)(struct amdgpu_device *adev);
};
struct imu_rlc_ram_golden {
@ -46,6 +52,7 @@ struct imu_rlc_ram_golden {
struct amdgpu_imu {
const struct amdgpu_imu_funcs *funcs;
enum imu_work_mode mode;
};
#endif

View file

@ -320,6 +320,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
if (!amdgpu_device_has_dc_support(adev)) {
if (!adev->enable_virtual_display)
/* Disable vblank IRQs aggressively for power-saving */
/* XXX: can this be enabled for DC? */
adev_to_drm(adev)->vblank_disable_immediate = true;
r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);

View file

@ -64,7 +64,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ti.process_name, ti.tgid, ti.task_name, ti.pid);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
r = amdgpu_device_gpu_recover_imp(ring->adev, job);
r = amdgpu_device_gpu_recover(ring->adev, job);
if (r)
DRM_ERROR("GPU Recovery Failed: %d\n", r);
} else {
@ -262,10 +262,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
if (!job->job_run_counter)
dma_fence_get(fence);
else if (finished->error < 0)
dma_fence_put(&job->hw_fence);
job->job_run_counter++;
amdgpu_job_free_resources(job);

View file

@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = gtt_man->size;
mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);

View file

@ -189,15 +189,29 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
if (r) {
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
dev_err(adev->dev,
"(%d) query_status_fence_offs wb alloc failed\n", r);
return r;
goto error_ids;
}
adev->mes.query_status_fence_gpu_addr =
adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
adev->mes.query_status_fence_ptr =
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
if (r) {
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
dev_err(adev->dev,
"(%d) read_val_offs alloc failed\n", r);
goto error_ids;
}
adev->mes.read_val_gpu_addr =
adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
adev->mes.read_val_ptr =
(uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
r = amdgpu_mes_doorbell_init(adev);
if (r)
goto error;
@ -206,6 +220,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
error:
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
error_ids:
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
@ -218,6 +234,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
void amdgpu_mes_fini(struct amdgpu_device *adev)
{
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
@ -675,8 +693,10 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
queue_input.doorbell_offset = qprops->doorbell_off;
queue_input.mqd_addr = queue->mqd_gpu_addr;
queue_input.wptr_addr = qprops->wptr_gpu_addr;
queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
queue_input.queue_type = qprops->queue_type;
queue_input.paging = qprops->paging;
queue_input.is_kfd_process = 0;
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
if (r) {
@ -792,6 +812,118 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
return r;
}
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
{
struct mes_misc_op_input op_input;
int r, val = 0;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_READ_REG;
op_input.read_reg.reg_offset = reg;
op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
if (!adev->mes.funcs->misc_op) {
DRM_ERROR("mes rreg is not supported!\n");
goto error;
}
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
DRM_ERROR("failed to read reg (0x%x)\n", reg);
else
val = *(adev->mes.read_val_ptr);
error:
amdgpu_mes_unlock(&adev->mes);
return val;
}
int amdgpu_mes_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t val)
{
struct mes_misc_op_input op_input;
int r;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_WRITE_REG;
op_input.write_reg.reg_offset = reg;
op_input.write_reg.reg_value = val;
if (!adev->mes.funcs->misc_op) {
DRM_ERROR("mes wreg is not supported!\n");
r = -EINVAL;
goto error;
}
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
DRM_ERROR("failed to write reg (0x%x)\n", reg);
error:
amdgpu_mes_unlock(&adev->mes);
return r;
}
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
struct mes_misc_op_input op_input;
int r;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
op_input.wrm_reg.reg0 = reg0;
op_input.wrm_reg.reg1 = reg1;
op_input.wrm_reg.ref = ref;
op_input.wrm_reg.mask = mask;
if (!adev->mes.funcs->misc_op) {
DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
r = -EINVAL;
goto error;
}
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
error:
amdgpu_mes_unlock(&adev->mes);
return r;
}
int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
uint32_t val, uint32_t mask)
{
struct mes_misc_op_input op_input;
int r;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_WRM_REG_WAIT;
op_input.wrm_reg.reg0 = reg;
op_input.wrm_reg.ref = val;
op_input.wrm_reg.mask = mask;
if (!adev->mes.funcs->misc_op) {
DRM_ERROR("mes reg wait is not supported!\n");
r = -EINVAL;
goto error;
}
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
error:
amdgpu_mes_unlock(&adev->mes);
return r;
}
static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
@ -801,6 +933,8 @@ amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
props->hqd_base_gpu_addr = ring->gpu_addr;
props->rptr_gpu_addr = ring->rptr_gpu_addr;
props->wptr_gpu_addr = ring->wptr_gpu_addr;
props->wptr_mc_addr =
ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
props->queue_size = ring->ring_size;
props->eop_gpu_addr = ring->eop_gpu_addr;
props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
@ -961,7 +1095,8 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
r = amdgpu_bo_create_kernel(adev,
sizeof(struct amdgpu_mes_ctx_meta_data),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
&ctx_data->meta_data_obj, NULL,
&ctx_data->meta_data_obj,
&ctx_data->meta_data_mc_addr,
&ctx_data->meta_data_ptr);
if (!ctx_data->meta_data_obj)
return -ENOMEM;
@ -975,7 +1110,9 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
{
if (ctx_data->meta_data_obj)
amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, NULL, NULL);
amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
&ctx_data->meta_data_mc_addr,
&ctx_data->meta_data_ptr);
}
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,

View file

@ -33,6 +33,13 @@
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
#define AMDGPU_MES_API_VERSION_SHIFT 12
#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
#define AMDGPU_MES_VERSION_MASK 0x00000fff
#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
@ -65,6 +72,9 @@ struct amdgpu_mes {
spinlock_t queue_id_lock;
uint32_t sched_version;
uint32_t kiq_version;
uint32_t total_max_queue;
uint32_t doorbell_id_offset;
uint32_t max_doorbell_slices;
@ -109,6 +119,10 @@ struct amdgpu_mes {
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
uint32_t read_val_offs;
uint64_t read_val_gpu_addr;
uint32_t *read_val_ptr;
uint32_t saved_flags;
/* initialize kiq pipe */
@ -166,6 +180,7 @@ struct amdgpu_mes_queue_properties {
uint64_t hqd_base_gpu_addr;
uint64_t rptr_gpu_addr;
uint64_t wptr_gpu_addr;
uint64_t wptr_mc_addr;
uint32_t queue_size;
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
@ -198,12 +213,14 @@ struct mes_add_queue_input {
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
uint64_t wptr_mc_addr;
uint32_t queue_type;
uint32_t paging;
uint32_t gws_base;
uint32_t gws_size;
uint64_t tba_addr;
uint64_t tma_addr;
uint32_t is_kfd_process;
};
struct mes_remove_queue_input {
@ -233,6 +250,36 @@ struct mes_resume_gang_input {
uint64_t gang_context_addr;
};
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
};
struct mes_misc_op_input {
enum mes_misc_opcode op;
union {
struct {
uint32_t reg_offset;
uint64_t buffer_addr;
} read_reg;
struct {
uint32_t reg_offset;
uint32_t reg_value;
} write_reg;
struct {
uint32_t ref;
uint32_t mask;
uint32_t reg0;
uint32_t reg1;
} wrm_reg;
};
};
struct amdgpu_mes_funcs {
int (*add_hw_queue)(struct amdgpu_mes *mes,
struct mes_add_queue_input *input);
@ -248,6 +295,9 @@ struct amdgpu_mes_funcs {
int (*resume_gang)(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input);
int (*misc_op)(struct amdgpu_mes *mes,
struct mes_misc_op_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
@ -280,6 +330,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq);
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
int amdgpu_mes_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t val);
int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
uint32_t val, uint32_t mask);
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
struct amdgpu_mes_ctx_data *ctx_data,

View file

@ -107,6 +107,7 @@ struct amdgpu_mes_ctx_meta_data {
struct amdgpu_mes_ctx_data {
struct amdgpu_bo *meta_data_obj;
uint64_t meta_data_gpu_addr;
uint64_t meta_data_mc_addr;
struct amdgpu_bo_va *meta_data_va;
void *meta_data_ptr;
uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];

View file

@ -350,15 +350,11 @@ struct amdgpu_mode_info {
#define AMDGPU_MAX_BL_LEVEL 0xFF
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
struct amdgpu_backlight_privdata {
struct amdgpu_encoder *encoder;
uint8_t negative;
};
#endif
struct amdgpu_atom_ss {
uint16_t percentage;
uint16_t percentage_divider;

View file

@ -35,6 +35,8 @@
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "atom.h"
#include "amdgpu_reset.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@ -2946,7 +2948,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work);
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
return 0;
}

View file

@ -328,10 +328,16 @@ struct ecc_info_per_ch {
uint16_t ce_count_hi_chip;
uint64_t mca_umc_status;
uint64_t mca_umc_addr;
uint64_t mca_ceumc_addr;
};
struct umc_ecc_info {
struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
/* Determine smu ecctable whether support
* record correctable error address
*/
int record_ce_addr_supported;
};
struct amdgpu_ras {

View file

@ -132,6 +132,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
}
atomic_set(&reset_domain->in_gpu_reset, 0);
atomic_set(&reset_domain->reset_res, 0);
init_rwsem(&reset_domain->sem);
return reset_domain;

View file

@ -82,6 +82,7 @@ struct amdgpu_reset_domain {
enum amdgpu_reset_domain_type type;
struct rw_semaphore sem;
atomic_t in_gpu_reset;
atomic_t reset_res;
};

View file

@ -543,12 +543,12 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
*/
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
prop->hqd_queue_priority =
AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
(ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
}

View file

@ -143,6 +143,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
/*
* Rings.

View file

@ -1798,18 +1798,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
/* Compute GTT size, either bsaed on 3/4th the size of RAM size
/* Compute GTT size, either based on 1/2 the size of RAM size
* or whatever the user passed on module init */
if (amdgpu_gtt_size == -1) {
struct sysinfo si;
si_meminfo(&si);
gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
adev->gmc.mc_vram_size),
((uint64_t)si.totalram * si.mem_unit * 3/4));
}
else
/* Certain GL unit tests for large textures can cause problems
* with the OOM killer since there is no way to link this memory
* to a process. This was originally mitigated (but not necessarily
* eliminated) by limiting the GTT size. The problem is this limit
* is often too low for many modern games so just make the limit 1/2
* of system memory which aligns with TTM. The OOM accounting needs
* to be addressed, but we shouldn't prevent common 3D applications
* from being usable just to potentially mitigate that corner case.
*/
gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
(u64)si.totalram * si.mem_unit / 2);
} else {
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
}
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);

View file

@ -486,26 +486,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS12:
case CHIP_VEGAM:
return AMDGPU_FW_LOAD_SMU;
case CHIP_VEGA10:
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_RENOIR:
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_VANGOGH:
case CHIP_DIMGREY_CAVEFISH:
case CHIP_ALDEBARAN:
case CHIP_BEIGE_GOBY:
case CHIP_YELLOW_CARP:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
case CHIP_CYAN_SKILLFISH:
if (!(load_type &&
adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))

View file

@ -329,6 +329,18 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
return 0;
}
/* from vcn4 and above, only unified queue is used */
static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
bool ret = false;
if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0))
ret = true;
return ret;
}
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
bool ret = false;
@ -718,19 +730,55 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
return r;
}
static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
uint32_t ib_pack_in_dw, bool enc)
{
uint32_t *ib_checksum;
ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
ib->ptr[ib->length_dw++] = 0x30000002;
ib_checksum = &ib->ptr[ib->length_dw++];
ib->ptr[ib->length_dw++] = ib_pack_in_dw;
ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
ib->ptr[ib->length_dw++] = 0x30000001;
ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
return ib_checksum;
}
static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
uint32_t ib_pack_in_dw)
{
uint32_t i;
uint32_t checksum = 0;
for (i = 0; i < ib_pack_in_dw; i++)
checksum += *(*ib_checksum + 2 + i);
**ib_checksum = checksum;
}
static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
const unsigned int ib_size_dw = 64;
unsigned int ib_size_dw = 64;
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
bool sq = amdgpu_vcn_using_unified_queue(ring);
uint32_t *ib_checksum;
uint32_t ib_pack_in_dw;
int i, r;
if (sq)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
AMDGPU_IB_POOL_DIRECT, &job);
if (r)
@ -739,6 +787,13 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
ib = &job->ibs[0];
ib->length_dw = 0;
/* single queue headers */
if (sq) {
ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ 4 + 2; /* engine info + decoding ib in dw */
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
}
ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
@ -752,6 +807,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
if (sq)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
@ -838,13 +896,18 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
unsigned int ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
uint32_t *ib_checksum = NULL;
uint64_t addr;
bool sq = amdgpu_vcn_using_unified_queue(ring);
int i, r;
if (sq)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
AMDGPU_IB_POOL_DIRECT, &job);
if (r)
@ -854,6 +917,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
if (sq)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
@ -873,6 +940,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
if (sq)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@ -892,13 +962,18 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
unsigned int ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
uint32_t *ib_checksum = NULL;
uint64_t addr;
bool sq = amdgpu_vcn_using_unified_queue(ring);
int i, r;
if (sq)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
AMDGPU_IB_POOL_DIRECT, &job);
if (r)
@ -908,6 +983,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
if (sq)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
@ -927,6 +1006,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
if (sq)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@ -977,6 +1059,20 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
return r;
}
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
long r;
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
if (r)
goto error;
r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
error:
return r;
}
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
{
switch(ring) {

View file

@ -364,6 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);

View file

@ -76,6 +76,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
unsigned long flags;
uint32_t seq;
if (adev->mes.ring.sched.ready) {
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
ref, mask);
return;
}
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,

View file

@ -54,7 +54,7 @@
* (uncached system pages).
* Each VM has an ID associated with it and there is a page table
* associated with each VMID. When executing a command buffer,
* the kernel tells the the ring what VMID to use for that command
* the kernel tells the ring what VMID to use for that command
* buffer. VMIDs are allocated dynamically as commands are submitted.
* The userspace drivers maintain their own address space and the kernel
* sets up their pages tables accordingly when they submit their

View file

@ -118,8 +118,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
}
}
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
{
u8 level;
@ -251,18 +249,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
}
}
#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */
void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder)
{
}
void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder)
{
}
#endif
bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);

View file

@ -39,7 +39,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
0x00000000, // DB_DEPTH_CLEAR
0x00000000, // PA_SC_SCREEN_SCISSOR_TL
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
0x00000000, // DB_DFSM_CONTROL
0, // HOLE
0x00000000, // DB_RESERVED_REG_2
0x00000000, // DB_Z_INFO
0x00000000, // DB_STENCIL_INFO
@ -50,7 +50,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
0x00000000, // DB_RESERVED_REG_1
0x00000000, // DB_RESERVED_REG_3
0x00000000, // DB_SPI_VRS_CENTER_LOCATION
0x00000000, // DB_VRS_OVERRIDE_CNTL
0, // HOLE
0x00000000, // DB_Z_READ_BASE_HI
0x00000000, // DB_STENCIL_READ_BASE_HI
0x00000000, // DB_Z_WRITE_BASE_HI
@ -270,29 +270,29 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
0x00000000, // PA_SC_FSR_EN
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_VIEW
0, // HOLE
0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
0, // HOLE
0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
0, // HOLE
0, // HOLE
0x00000000, // PA_SC_VRS_RATE_BASE
0x00000000, // PA_SC_VRS_RATE_BASE_EXT
0x00000000, // PA_SC_VRS_RATE_SIZE_XY
0x00000000, // PA_SC_VRS_RATE_VIEW
0xffffffff, // VGT_MAX_VTX_INDX
0x00000000, // VGT_MIN_VTX_INDX
0x00000000, // VGT_INDX_OFFSET
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
0x00550055, // CB_RMI_GL2_CACHE_CONTROL
0x00000000, // CB_BLEND_RED
0x00000000, // CB_BLEND_GREEN
0x00000000, // CB_BLEND_BLUE
0x00000000, // CB_BLEND_ALPHA
0x00000000, // CB_DCC_CONTROL
0x00000000, // CB_FDCC_CONTROL
0x00000000, // CB_COVERAGE_OUT_CONTROL
0x00000000, // DB_STENCIL_CONTROL
0x01000000, // DB_STENCILREFMASK
@ -470,8 +470,8 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
0x00000000, // SPI_BARYC_CNTL
0, // HOLE
0x00000000, // SPI_TMPRING_SIZE
0, // HOLE
0, // HOLE
0x00000000, // SPI_GFX_SCRATCH_BASE_LO
0x00000000, // SPI_GFX_SCRATCH_BASE_HI
0, // HOLE
0, // HOLE
0, // HOLE
@ -545,7 +545,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
0x00000000, // PA_STEREO_CNTL
0x00000000, // PA_STATE_STEREO_X
0x00000000, // PA_CL_VRS_CNTL
0x00000000, // PA_SIDEBAND_REQUEST_DELAYS
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
@ -658,30 +658,30 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
0x00000000, // PA_SU_POINT_MINMAX
0x00000000, // PA_SU_LINE_CNTL
0x00000000, // PA_SC_LINE_STIPPLE
0x00000000, // VGT_OUTPUT_PATH_CNTL
0x00000000, // VGT_HOS_CNTL
0, // HOLE
0, // HOLE
0x00000000, // VGT_HOS_MAX_TESS_LEVEL
0x00000000, // VGT_HOS_MIN_TESS_LEVEL
0x00000000, // VGT_HOS_REUSE_DEPTH
0x00000000, // VGT_GROUP_PRIM_TYPE
0x00000000, // VGT_GROUP_FIRST_DECR
0x00000000, // VGT_GROUP_DECR
0x00000000, // VGT_GROUP_VECT_0_CNTL
0x00000000, // VGT_GROUP_VECT_1_CNTL
0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
0x00000000, // VGT_GS_MODE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // VGT_GS_ONCHIP_CNTL
0x00000000, // PA_SC_MODE_CNTL_0
0x00000000, // PA_SC_MODE_CNTL_1
0x00000000, // VGT_ENHANCE
0x00000100, // VGT_GS_PER_ES
0x00000080, // VGT_ES_PER_GS
0x00000002, // VGT_GS_PER_VS
0x00000000, // VGT_GSVS_RING_OFFSET_1
0x00000000, // VGT_GSVS_RING_OFFSET_2
0x00000000, // VGT_GSVS_RING_OFFSET_3
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // IA_ENHANCE
};
static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
@ -695,37 +695,36 @@ static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
};
static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
{
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
0, // HOLE
0x00000000, // VGT_INSTANCE_STEP_RATE_0
0x00000000, // VGT_INSTANCE_STEP_RATE_1
0x000000ff, // IA_MULTI_VGT_PARAM
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // VGT_ESGS_RING_ITEMSIZE
0x00000000, // VGT_GSVS_RING_ITEMSIZE
0, // HOLE
0x00000000, // VGT_REUSE_OFF
0x00000000, // VGT_VTX_CNT_EN
0, // HOLE
0x00000000, // DB_HTILE_SURFACE
0x00000000, // DB_SRESULTS_COMPARE_STATE0
0x00000000, // DB_SRESULTS_COMPARE_STATE1
0x00000000, // DB_PRELOAD_CONTROL
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
@ -745,10 +744,10 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
0x00000000, // VGT_TESS_DISTRIBUTION
0x00000000, // VGT_SHADER_STAGES_EN
0x00000000, // VGT_LS_HS_CONFIG
0x00000000, // VGT_GS_VERT_ITEMSIZE
0x00000000, // VGT_GS_VERT_ITEMSIZE_1
0x00000000, // VGT_GS_VERT_ITEMSIZE_2
0x00000000, // VGT_GS_VERT_ITEMSIZE_3
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // VGT_TF_PARAM
0x00000000, // DB_ALPHA_TO_MASK
0, // HOLE
@ -759,11 +758,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
0x00000000, // VGT_GS_INSTANCE_CNT
0x00000000, // VGT_STRMOUT_CONFIG
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
};
static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
{
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // PA_SC_CENTROID_PRIORITY_0
0x00000000, // PA_SC_CENTROID_PRIORITY_1
0x00001000, // PA_SC_LINE_CNTL
@ -797,126 +807,126 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
0x00000000, // PA_SC_NGG_MODE_CNTL
0x00000000, // PA_SC_BINNER_CNTL_2
0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
0x00000020, // VGT_OUT_DEALLOC_CNTL
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR0_BASE
0x00000000, // CB_COLOR0_PITCH
0x00000000, // CB_COLOR0_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR0_VIEW
0x00000000, // CB_COLOR0_INFO
0x00000000, // CB_COLOR0_ATTRIB
0x00000000, // CB_COLOR0_DCC_CONTROL
0x00000000, // CB_COLOR0_CMASK
0x00000000, // CB_COLOR0_CMASK_SLICE
0x00000000, // CB_COLOR0_FMASK
0x00000000, // CB_COLOR0_FMASK_SLICE
0x00000000, // CB_COLOR0_CLEAR_WORD0
0x00000000, // CB_COLOR0_CLEAR_WORD1
0x00000000, // CB_COLOR0_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR0_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR1_BASE
0x00000000, // CB_COLOR1_PITCH
0x00000000, // CB_COLOR1_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR1_VIEW
0x00000000, // CB_COLOR1_INFO
0x00000000, // CB_COLOR1_ATTRIB
0x00000000, // CB_COLOR1_DCC_CONTROL
0x00000000, // CB_COLOR1_CMASK
0x00000000, // CB_COLOR1_CMASK_SLICE
0x00000000, // CB_COLOR1_FMASK
0x00000000, // CB_COLOR1_FMASK_SLICE
0x00000000, // CB_COLOR1_CLEAR_WORD0
0x00000000, // CB_COLOR1_CLEAR_WORD1
0x00000000, // CB_COLOR1_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR1_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR2_BASE
0x00000000, // CB_COLOR2_PITCH
0x00000000, // CB_COLOR2_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR2_VIEW
0x00000000, // CB_COLOR2_INFO
0x00000000, // CB_COLOR2_ATTRIB
0x00000000, // CB_COLOR2_DCC_CONTROL
0x00000000, // CB_COLOR2_CMASK
0x00000000, // CB_COLOR2_CMASK_SLICE
0x00000000, // CB_COLOR2_FMASK
0x00000000, // CB_COLOR2_FMASK_SLICE
0x00000000, // CB_COLOR2_CLEAR_WORD0
0x00000000, // CB_COLOR2_CLEAR_WORD1
0x00000000, // CB_COLOR2_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR2_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR3_BASE
0x00000000, // CB_COLOR3_PITCH
0x00000000, // CB_COLOR3_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR3_VIEW
0x00000000, // CB_COLOR3_INFO
0x00000000, // CB_COLOR3_ATTRIB
0x00000000, // CB_COLOR3_DCC_CONTROL
0x00000000, // CB_COLOR3_CMASK
0x00000000, // CB_COLOR3_CMASK_SLICE
0x00000000, // CB_COLOR3_FMASK
0x00000000, // CB_COLOR3_FMASK_SLICE
0x00000000, // CB_COLOR3_CLEAR_WORD0
0x00000000, // CB_COLOR3_CLEAR_WORD1
0x00000000, // CB_COLOR3_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR3_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR4_BASE
0x00000000, // CB_COLOR4_PITCH
0x00000000, // CB_COLOR4_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR4_VIEW
0x00000000, // CB_COLOR4_INFO
0x00000000, // CB_COLOR4_ATTRIB
0x00000000, // CB_COLOR4_DCC_CONTROL
0x00000000, // CB_COLOR4_CMASK
0x00000000, // CB_COLOR4_CMASK_SLICE
0x00000000, // CB_COLOR4_FMASK
0x00000000, // CB_COLOR4_FMASK_SLICE
0x00000000, // CB_COLOR4_CLEAR_WORD0
0x00000000, // CB_COLOR4_CLEAR_WORD1
0x00000000, // CB_COLOR4_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR4_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR5_BASE
0x00000000, // CB_COLOR5_PITCH
0x00000000, // CB_COLOR5_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR5_VIEW
0x00000000, // CB_COLOR5_INFO
0x00000000, // CB_COLOR5_ATTRIB
0x00000000, // CB_COLOR5_DCC_CONTROL
0x00000000, // CB_COLOR5_CMASK
0x00000000, // CB_COLOR5_CMASK_SLICE
0x00000000, // CB_COLOR5_FMASK
0x00000000, // CB_COLOR5_FMASK_SLICE
0x00000000, // CB_COLOR5_CLEAR_WORD0
0x00000000, // CB_COLOR5_CLEAR_WORD1
0x00000000, // CB_COLOR5_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR5_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR6_BASE
0x00000000, // CB_COLOR6_PITCH
0x00000000, // CB_COLOR6_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR6_VIEW
0x00000000, // CB_COLOR6_INFO
0x00000000, // CB_COLOR6_ATTRIB
0x00000000, // CB_COLOR6_DCC_CONTROL
0x00000000, // CB_COLOR6_CMASK
0x00000000, // CB_COLOR6_CMASK_SLICE
0x00000000, // CB_COLOR6_FMASK
0x00000000, // CB_COLOR6_FMASK_SLICE
0x00000000, // CB_COLOR6_CLEAR_WORD0
0x00000000, // CB_COLOR6_CLEAR_WORD1
0x00000000, // CB_COLOR6_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR6_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR7_BASE
0x00000000, // CB_COLOR7_PITCH
0x00000000, // CB_COLOR7_SLICE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR7_VIEW
0x00000000, // CB_COLOR7_INFO
0x00000000, // CB_COLOR7_ATTRIB
0x00000000, // CB_COLOR7_DCC_CONTROL
0x00000000, // CB_COLOR7_CMASK
0x00000000, // CB_COLOR7_CMASK_SLICE
0x00000000, // CB_COLOR7_FMASK
0x00000000, // CB_COLOR7_FMASK_SLICE
0x00000000, // CB_COLOR7_CLEAR_WORD0
0x00000000, // CB_COLOR7_CLEAR_WORD1
0x00000000, // CB_COLOR7_FDCC_CONTROL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR7_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR0_BASE_EXT
@ -927,22 +937,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
0x00000000, // CB_COLOR5_BASE_EXT
0x00000000, // CB_COLOR6_BASE_EXT
0x00000000, // CB_COLOR7_BASE_EXT
0x00000000, // CB_COLOR0_CMASK_BASE_EXT
0x00000000, // CB_COLOR1_CMASK_BASE_EXT
0x00000000, // CB_COLOR2_CMASK_BASE_EXT
0x00000000, // CB_COLOR3_CMASK_BASE_EXT
0x00000000, // CB_COLOR4_CMASK_BASE_EXT
0x00000000, // CB_COLOR5_CMASK_BASE_EXT
0x00000000, // CB_COLOR6_CMASK_BASE_EXT
0x00000000, // CB_COLOR7_CMASK_BASE_EXT
0x00000000, // CB_COLOR0_FMASK_BASE_EXT
0x00000000, // CB_COLOR1_FMASK_BASE_EXT
0x00000000, // CB_COLOR2_FMASK_BASE_EXT
0x00000000, // CB_COLOR3_FMASK_BASE_EXT
0x00000000, // CB_COLOR4_FMASK_BASE_EXT
0x00000000, // CB_COLOR5_FMASK_BASE_EXT
0x00000000, // CB_COLOR6_FMASK_BASE_EXT
0x00000000, // CB_COLOR7_FMASK_BASE_EXT
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // CB_COLOR0_DCC_BASE_EXT
0x00000000, // CB_COLOR1_DCC_BASE_EXT
0x00000000, // CB_COLOR2_DCC_BASE_EXT
@ -976,8 +986,7 @@ static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
{gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
{gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
{gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
{gfx11_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
{gfx11_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
{gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
{ 0, 0, 0 }
};
static const struct cs_section_def gfx11_cs_data[] = {

View file

@ -53,7 +53,7 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
amdgpu_ring_write(ring, scratch -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@ -4711,6 +4712,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
{
struct amdgpu_ring *ring;
unsigned int irq_type;
unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@ -4728,8 +4730,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
AMDGPU_RING_PRIO_DEFAULT, NULL);
hw_prio, NULL);
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@ -4791,7 +4795,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_pipe_per_me = 2;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@ -6581,6 +6585,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
}
}
static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
struct v10_gfx_mqd *mqd,
struct amdgpu_mqd_prop *prop)
{
bool priority = 0;
u32 tmp;
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority
*/
if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
priority = 1;
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
mqd->cp_gfx_hqd_queue_priority = tmp;
}
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@ -6609,11 +6631,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
mqd->cp_gfx_hqd_queue_priority = tmp;
/* set up gfx queue priority */
gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);

View file

@ -4563,6 +4563,9 @@ static int gfx_v11_0_hw_init(void *handle)
if (adev->gfx.imu.funcs->start_imu)
adev->gfx.imu.funcs->start_imu(adev);
}
/* disable gpa mode in backdoor loading */
gfx_v11_0_disable_gpa_mode(adev);
}
}
@ -4781,19 +4784,17 @@ static int gfx_v11_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v11_0_cp_compute_enable(adev, false);
if (grbm_soft_reset) {
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
tmp |= grbm_soft_reset;
dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
tmp |= grbm_soft_reset;
dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
udelay(50);
udelay(50);
tmp &= ~grbm_soft_reset;
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
}
tmp &= ~grbm_soft_reset;
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@ -6293,6 +6294,11 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
adev->gfx.imu.mode = MISSION_MODE;
else
adev->gfx.imu.mode = DEBUG_MODE;
adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
}

View file

@ -456,7 +456,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
gmc_v10_0_flush_gpu_tlb(adev, vmid,
AMDGPU_GFXHUB_0, flush_type);
}
break;
if (!adev->enable_mes)
break;
}
}

View file

@ -37,6 +37,7 @@
#include "nbio_v4_3.h"
#include "gfxhub_v3_0.h"
#include "mmhub_v3_0.h"
#include "mmhub_v3_0_1.h"
#include "mmhub_v3_0_2.h"
#include "athub_v3_0.h"
@ -267,7 +268,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
@ -343,7 +344,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
gmc_v11_0_flush_gpu_tlb(adev, vmid,
AMDGPU_GFXHUB_0, flush_type);
}
break;
}
}
@ -548,6 +548,9 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[MMHUB_HWIP][0]) {
case IP_VERSION(3, 0, 1):
adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
break;
case IP_VERSION(3, 0, 2):
adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
break;

View file

@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_imu.h"
#include "amdgpu_dpm.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
@ -117,32 +118,9 @@ static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
return 0;
}
static void imu_v11_0_setup(struct amdgpu_device *adev)
static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev)
{
int imu_reg_val;
//enable IMU debug mode
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
imu_reg_val |= 0x1;
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x10007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
}
static int imu_v11_0_start(struct amdgpu_device *adev)
{
int imu_reg_val, i;
//Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
imu_reg_val &= 0xfffffffe;
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
int i, imu_reg_val = 0;
for (i = 0; i < adev->usec_timeout; i++) {
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
@ -159,6 +137,41 @@ static int imu_v11_0_start(struct amdgpu_device *adev)
return 0;
}
static void imu_v11_0_setup(struct amdgpu_device *adev)
{
int imu_reg_val;
//enable IMU debug mode
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
if (adev->gfx.imu.mode == DEBUG_MODE) {
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
imu_reg_val |= 0x1;
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
}
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x10007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
}
static int imu_v11_0_start(struct amdgpu_device *adev)
{
int imu_reg_val;
//Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
imu_reg_val &= 0xfffffffe;
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
if (adev->flags & AMD_IS_APU)
amdgpu_dpm_set_gfx_power_up_by_imu(adev);
return imu_v11_0_wait_for_reset_status(adev);
}
static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
{
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
@ -364,4 +377,5 @@ const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
.setup_imu = imu_v11_0_setup,
.start_imu = imu_v11_0_start,
.program_rlc_ram = imu_v11_0_program_rlc_ram,
.wait_for_reset_status = imu_v11_0_wait_for_reset_status,
};

View file

@ -156,7 +156,13 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
input->gang_global_priority_level;
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
AMDGPU_MES_API_VERSION_SHIFT) >= 2)
mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
else
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
@ -165,6 +171,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
@ -267,6 +274,58 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
&mes_status_pkt, sizeof(mes_status_pkt));
}
static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
struct mes_misc_op_input *input)
{
union MESAPI__MISC misc_pkt;
memset(&misc_pkt, 0, sizeof(misc_pkt));
misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
misc_pkt.header.opcode = MES_SCH_API_MISC;
misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
switch (input->op) {
case MES_MISC_OP_READ_REG:
misc_pkt.opcode = MESAPI_MISC__READ_REG;
misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
break;
case MES_MISC_OP_WRITE_REG:
misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
break;
case MES_MISC_OP_WRM_REG_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
misc_pkt.wait_reg_mem.reg_offset2 = 0;
break;
case MES_MISC_OP_WRM_REG_WR_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
break;
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
}
misc_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
misc_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&misc_pkt, sizeof(misc_pkt));
}
static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
{
int i;
@ -312,6 +371,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
@ -328,6 +388,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
.suspend_gang = mes_v11_0_suspend_gang,
.resume_gang = mes_v11_0_resume_gang,
.misc_op = mes_v11_0_misc_op,
};
static int mes_v11_0_init_microcode(struct amdgpu_device *adev,
@ -858,6 +919,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
mes_v11_0_queue_init_register(ring);
}
/* get MES scheduler/KIQ versions */
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, 3, pipe, 0, 0);
if (pipe == AMDGPU_MES_SCHED_PIPE)
adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
return 0;
}
@ -1120,6 +1193,7 @@ static int mes_v11_0_hw_init(void *handle)
* with MES enabled.
*/
adev->gfx.kiq.ring.sched.ready = false;
adev->mes.ring.sched.ready = true;
return 0;

View file

@ -0,0 +1,555 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "mmhub_v3_0_1.h"
#include "mmhub/mmhub_3_0_1_offset.h"
#include "mmhub/mmhub_3_0_1_sh_mask.h"
#include "navi10_enum.h"
#include "soc15_common.h"
#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
static const char *mmhub_client_ids_v3_0_1[][2] = {
[0][0] = "VMC",
[4][0] = "DCEDMC",
[5][0] = "DCEVGA",
[6][0] = "MP0",
[7][0] = "MP1",
[8][0] = "MPIO",
[16][0] = "HDP",
[17][0] = "LSDMA",
[18][0] = "JPEG",
[19][0] = "VCNU0",
[21][0] = "VSCH",
[22][0] = "VCNU1",
[23][0] = "VCN1",
[32+20][0] = "VCN0",
[2][1] = "DBGUNBIO",
[3][1] = "DCEDWB",
[4][1] = "DCEDMC",
[5][1] = "DCEVGA",
[6][1] = "MP0",
[7][1] = "MP1",
[8][1] = "MPIO",
[10][1] = "DBGU0",
[11][1] = "DBGU1",
[12][1] = "DBGU2",
[13][1] = "DBGU3",
[14][1] = "XDP",
[15][1] = "OSSSYS",
[16][1] = "HDP",
[17][1] = "LSDMA",
[18][1] = "JPEG",
[19][1] = "VCNU0",
[20][1] = "VCN0",
[21][1] = "VSCH",
[22][1] = "VCNU1",
[23][1] = "VCN1",
};
static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
static void
mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
const char *mmhub_cid = NULL;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, RW);
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
switch (adev->ip_versions[MMHUB_HWIP][0]) {
case IP_VERSION(3, 0, 1):
mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
break;
default:
mmhub_cid = NULL;
break;
}
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
dev_err(adev->dev, "\t RW: 0x%x\n", rw);
}
static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
uint32_t tmp;
/* Program the AGP BAR */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/*
* the new L1 policy will block SRIOV guest from writing
* these regs, and they will be programed at host.
* so skip programing these regs.
*/
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
tmp = regMMVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
tmp = regMMVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
}
static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
{
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
0);
}
static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
{
/* GART Enable. */
mmhub_v3_0_1_init_gart_aperture_regs(adev);
mmhub_v3_0_1_init_system_aperture_regs(adev);
mmhub_v3_0_1_init_tlb_regs(adev);
mmhub_v3_0_1_init_cache_regs(adev);
mmhub_v3_0_1_enable_system_domain(adev);
mmhub_v3_0_1_disable_identity_aperture(adev);
mmhub_v3_0_1_setup_vmid_config(adev);
mmhub_v3_0_1_program_invalidation(adev);
return 0;
}
static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
}
/**
* mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
.print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status,
.get_invalidate_req = mmhub_v3_0_1_get_invalidate_req,
};
static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
regMMVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
}
static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
{
u64 base;
base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
}
static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
//TODO
}
static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
//TODO
}
static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
//TODO
}
const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
.init = mmhub_v3_0_1_init,
.get_fb_location = mmhub_v3_0_1_get_fb_location,
.get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset,
.gart_enable = mmhub_v3_0_1_gart_enable,
.set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default,
.gart_disable = mmhub_v3_0_1_gart_disable,
.set_clockgating = mmhub_v3_0_1_set_clockgating,
.get_clockgating = mmhub_v3_0_1_get_clockgating,
.setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs,
};

View file

@ -0,0 +1,28 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MMHUB_V3_0_1_H__
#define __MMHUB_V3_0_1_H__
extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs;
#endif

View file

@ -284,7 +284,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover_imp(adev, NULL);
amdgpu_device_gpu_recover(adev, NULL);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View file

@ -311,7 +311,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover_imp(adev, NULL);
amdgpu_device_gpu_recover(adev, NULL);
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View file

@ -523,7 +523,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
/* Trigger recovery due to world switch failure */
if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover_imp(adev, NULL);
amdgpu_device_gpu_recover(adev, NULL);
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View file

@ -547,7 +547,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
{
uint32_t reg, reg_data;
if (adev->asic_type != CHIP_SIENNA_CICHLID)
if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0))
return;
reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);

View file

@ -240,8 +240,11 @@ static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
return;
def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
if (enable) {
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
@ -266,9 +269,12 @@ static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
return;
/* TODO: need update in future */
def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
if (enable) {
data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
} else {
data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
@ -344,6 +350,121 @@ static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
return rom_offset;
}
#ifdef CONFIG_PCIEASPM
static void nbio_v4_3_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
data = 0x35EB;
data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
if (adev->pdev->ltr_path)
data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
else
data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
#endif
static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
{
#ifdef CONFIG_PCIEASPM
uint32_t def, data;
if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) &&
!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0)))
return;
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7);
data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data);
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4);
data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data);
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
nbio_v4_3_program_ltr(adev);
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
#endif
}
const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
@ -365,4 +486,5 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.init_registers = nbio_v4_3_init_registers,
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
.program_aspm = nbio_v4_3_program_aspm,
};

View file

@ -58,10 +58,16 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan
bool use_doorbell, int doorbell_index,
int doorbell_size)
{
u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE);
u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
u32 doorbell_range = RREG32_PCIE_PORT(reg);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_CSDMA_DOORBELL_RANGE,
OFFSET, doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_CSDMA_DOORBELL_RANGE,
SIZE, doorbell_size);
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_SDMA0_DOORBELL_RANGE,
OFFSET, doorbell_index);

View file

@ -39,7 +39,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@ -103,6 +105,10 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
err = psp_init_sos_microcode(psp, chip_name);
if (err)
return err;
/* It's not necessary to load ras ta on Guest side */
err = psp_init_ta_microcode(psp, chip_name);
if (err)
return err;
break;

View file

@ -310,6 +310,7 @@ static enum amd_reset_method
soc21_asic_reset_method(struct amdgpu_device *adev)
{
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
return amdgpu_reset_method;
@ -320,6 +321,8 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 0):
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
return AMD_RESET_METHOD_BACO;
@ -341,6 +344,10 @@ static int soc21_asic_reset(struct amdgpu_device *adev)
dev_info(adev->dev, "BACO reset\n");
ret = amdgpu_dpm_baco_reset(adev);
break;
case AMD_RESET_METHOD_MODE2:
dev_info(adev->dev, "MODE2 reset\n");
ret = amdgpu_dpm_mode2_reset(adev);
break;
default:
dev_info(adev->dev, "MODE1 reset\n");
ret = amdgpu_device_mode1_reset(adev);
@ -379,11 +386,12 @@ static void soc21_pcie_gen3_enable(struct amdgpu_device *adev)
static void soc21_program_aspm(struct amdgpu_device *adev)
{
if (amdgpu_aspm == 0)
if (!amdgpu_device_should_use_aspm(adev))
return;
/* todo */
if (!(adev->flags & AMD_IS_APU) &&
(adev->nbio.funcs->program_aspm))
adev->nbio.funcs->program_aspm(adev);
}
static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev,
@ -555,8 +563,11 @@ static int soc21_common_early_init(void *handle)
adev->cg_flags =
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_REPEATER_FGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS;
adev->pg_flags =
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |

View file

@ -119,6 +119,24 @@ static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device
*error_count += 1;
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
if (ras->umc_ecc.record_ce_addr_supported) {
uint64_t err_addr, soc_pa;
uint32_t channel_index =
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
/* translate umc channel address to soc pa, 3 parts are included */
soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(err_addr);
/* The umc channel bits are not original values, they are hashed */
SET_CHANNEL_HASH(channel_index, soc_pa);
dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
}
}
}
@ -251,7 +269,9 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev
static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset,
unsigned long *error_count)
unsigned long *error_count,
uint32_t ch_inst,
uint32_t umc_inst)
{
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
uint32_t ecc_err_cnt, ecc_err_cnt_addr;
@ -295,6 +315,31 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
*error_count += 1;
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
{
uint64_t err_addr, soc_pa;
uint32_t mc_umc_addrt0;
uint32_t channel_index;
mc_umc_addrt0 =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
channel_index =
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
/* translate umc channel address to soc pa, 3 parts are included */
soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(err_addr);
/* The umc channel bits are not original values, they are hashed */
SET_CHANNEL_HASH(channel_index, soc_pa);
dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
}
}
}
@ -395,7 +440,8 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
ch_inst);
umc_v6_7_query_correctable_error_count(adev,
umc_reg_offset,
&(err_data->ce_count));
&(err_data->ce_count),
ch_inst, umc_inst);
umc_v6_7_querry_uncorrectable_error_count(adev,
umc_reg_offset,
&(err_data->ue_count));

View file

@ -29,7 +29,6 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
#include "vcn_sw_ring.h"
#include "vcn/vcn_4_0_0_offset.h"
#include "vcn/vcn_4_0_0_sh_mask.h"
@ -45,15 +44,12 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
bool unifiedQ_enabled = false;
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
};
static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (unifiedQ_enabled) {
adev->vcn.num_vcn_inst = 1;
adev->vcn.num_enc_rings = 1;
} else {
adev->vcn.num_enc_rings = 2;
}
/* re-use enc ring as unified ring */
adev->vcn.num_enc_rings = 1;
if (!unifiedQ_enabled)
vcn_v4_0_set_dec_ring_funcs(adev);
vcn_v4_0_set_enc_ring_funcs(adev);
vcn_v4_0_set_unified_ring_funcs(adev);
vcn_v4_0_set_irq_funcs(adev);
return 0;
}
static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
{
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
DRM_INFO("PSP loading VCN firmware\n");
}
}
/**
* vcn_v4_0_sw_init - sw init for VCN block
*
@ -111,17 +86,14 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
static int vcn_v4_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
if (unifiedQ_enabled)
amdgpu_vcn_setup_unified_queue_ucode(adev);
else
amdgpu_vcn_setup_ucode(adev);
amdgpu_vcn_setup_ucode(adev);
r = amdgpu_vcn_resume(adev);
if (r)
@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
/* VCN DEC TRAP */
atomic_set(&adev->vcn.inst[i].sched_score, 0);
/* VCN UNIFIED TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq);
VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
if (r)
return r;
atomic_set(&adev->vcn.inst[i].sched_score, 0);
if (!unifiedQ_enabled) {
ring = &adev->vcn.inst[i].ring_dec;
ring->use_doorbell = true;
ring = &adev->vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
/* VCN4 doorbell layout
* 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg)
* 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue)
* 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue)
* 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved)
* 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue)
*/
sprintf(ring->name, "vcn_unified_%d", i);
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1)
+ 5 + 8 * i;
sprintf(ring->name, "vcn_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
AMDGPU_RING_PRIO_DEFAULT,
&adev->vcn.inst[i].sched_score);
if (r)
return r;
}
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
/* VCN ENC TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
if (r)
return r;
ring = &adev->vcn.inst[i].ring_enc[j];
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
if (unifiedQ_enabled) {
sprintf(ring->name, "vcn_unified%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
} else {
enum amdgpu_ring_priority_level hw_prio;
hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
sprintf(ring->name, "vcn_enc_%d.%d", i, j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
hw_prio, &adev->vcn.inst[i].sched_score);
}
if (r)
return r;
}
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
if (r)
return r;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
if (unifiedQ_enabled) {
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
fw_shared->sq.is_enabled = 1;
}
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
fw_shared->sq.is_enabled = 1;
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
if (!unifiedQ_enabled) {
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
return 0;
}
@ -220,19 +151,19 @@ static int vcn_v4_0_sw_fini(void *handle)
int i, r, idx;
if (drm_dev_enter(&adev->ddev, &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
if (adev->vcn.harvest_config & (1 << i))
continue;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
fw_shared->sq.is_enabled = 0;
}
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
fw_shared->sq.is_enabled = 0;
}
drm_dev_exit(idx);
}
drm_dev_exit(idx);
}
r = amdgpu_vcn_suspend(adev);
if (r)
@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int i, j, r;
int i, r;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
if (unifiedQ_enabled)
ring = &adev->vcn.inst[i].ring_enc[0];
else
ring = &adev->vcn.inst[i].ring_dec;
ring = &adev->vcn.inst[i].ring_enc[0];
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
done:
@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
int i;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
if (unifiedQ_enabled) {
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
} else
ring = &adev->vcn.inst[inst_idx].ring_dec;
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL,
ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
VCN_RB4_DB_CTRL__EN_MASK);
/* program the RB_BASE for ring buffer */
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));
/* reseting ring, fw should not check RB ring */
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
/* Initialize the ring buffer's read and write pointers */
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp);
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0);
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
if (unifiedQ_enabled)
fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET;
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
for (i = 0; i < adev->vcn.num_enc_rings; i++) {
ring = &adev->vcn.inst[inst_idx].ring_enc[i];
WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
if (i) {
ring = &adev->vcn.inst[inst_idx].ring_enc[1];
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4);
tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp);
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL,
ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
VCN_RB2_DB_CTRL__EN_MASK);
} else {
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
}
}
return 0;
}
@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
amdgpu_dpm_enable_uvd(adev, true);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
continue;
@ -1081,15 +961,15 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
/* disable master interrupt */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
~UVD_MASTINT_EN__VCPU_EN_MASK);
~UVD_MASTINT_EN__VCPU_EN_MASK);
/* enable LMI MC and UMC channels */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
@ -1099,10 +979,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
/* setup regUVD_LMI_CTRL */
tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
/* setup regUVD_MPC_CNTL */
tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
@ -1112,37 +992,37 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
/* setup UVD_MPC_SET_MUXA0 */
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
/* setup UVD_MPC_SET_MUXB0 */
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
/* setup UVD_MPC_SET_MUX */
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
vcn_v4_0_mc_resume(adev, i);
/* VCN global tiling registers */
WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
adev->gfx.config.gb_addr_config);
adev->gfx.config.gb_addr_config);
/* unblock VCPU register access */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
/* release VCPU reset to boot */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
~UVD_VCPU_CNTL__BLK_RST_MASK);
~UVD_VCPU_CNTL__BLK_RST_MASK);
for (j = 0; j < 10; ++j) {
uint32_t status;
@ -1166,13 +1046,13 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
if (status & 2)
break;
dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
UVD_VCPU_CNTL__BLK_RST_MASK,
~UVD_VCPU_CNTL__BLK_RST_MASK);
dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
UVD_VCPU_CNTL__BLK_RST_MASK,
~UVD_VCPU_CNTL__BLK_RST_MASK);
mdelay(10);
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
~UVD_VCPU_CNTL__BLK_RST_MASK);
~UVD_VCPU_CNTL__BLK_RST_MASK);
mdelay(10);
r = -1;
@ -1180,78 +1060,43 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
}
if (r) {
dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i);
dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
return r;
}
/* enable master interrupt */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK,
~UVD_MASTINT_EN__VCPU_EN_MASK);
UVD_MASTINT_EN__VCPU_EN_MASK,
~UVD_MASTINT_EN__VCPU_EN_MASK);
/* clear the busy bit of VCN_STATUS */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
if (unifiedQ_enabled) {
ring = &adev->vcn.inst[i].ring_enc[0];
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
} else {
ring = &adev->vcn.inst[i].ring_dec;
WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL,
ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
VCN_RB4_DB_CTRL__EN_MASK);
/* program the RB_BASE for ring buffer */
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));
/* resetting ring, fw should not check RB ring */
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
/* Initialize the ring buffer's read and write pointers */
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp);
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4);
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
}
ring = &adev->vcn.inst[i].ring_enc[0];
WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
if (unifiedQ_enabled)
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
else {
ring = &adev->vcn.inst[i].ring_enc[1];
WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL,
ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
VCN_RB2_DB_CTRL__EN_MASK);
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2);
WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp);
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2);
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4);
}
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
}
return 0;
@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
*/
static int vcn_v4_0_stop(struct amdgpu_device *adev)
{
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
r = vcn_v4_0_stop_dpg_mode(adev, i);
continue;
@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
/* unpause dpg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
}
/**
* vcn_v4_0_dec_ring_get_rptr - get read pointer
* vcn_v4_0_unified_ring_get_rptr - get unified read pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware read pointer
* Returns the current hardware unified read pointer
*/
static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4);
if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
DRM_ERROR("wrong ring id is identified in %s", __func__);
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
}
/**
* vcn_v4_0_dec_ring_get_wptr - get write pointer
* vcn_v4_0_unified_ring_get_wptr - get unified write pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware write pointer
* Returns the current hardware unified write pointer
*/
static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
DRM_ERROR("wrong ring id is identified in %s", __func__);
if (ring->use_doorbell)
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4);
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
}
/**
* vcn_v4_0_dec_ring_set_wptr - set write pointer
*
* @ring: amdgpu_ring pointer
*
* Commits the write pointer to the hardware
*/
static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2,
lower_32_bits(ring->wptr));
}
if (ring->use_doorbell) {
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr));
}
}
static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0x3f,
.nop = VCN_DEC_SW_CMD_NO_OP,
.vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v4_0_dec_ring_get_rptr,
.get_wptr = vcn_v4_0_dec_ring_get_wptr,
.set_wptr = vcn_v4_0_dec_ring_set_wptr,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
VCN_SW_RING_EMIT_FRAME_SIZE,
.emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
.emit_ib = vcn_dec_sw_ring_emit_ib,
.emit_fence = vcn_dec_sw_ring_emit_fence,
.emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
.test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
.test_ib = amdgpu_vcn_dec_sw_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_dec_sw_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_vcn_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_dec_sw_ring_emit_wreg,
.emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
/**
* vcn_v4_0_enc_ring_get_rptr - get enc read pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware enc read pointer
*/
static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
else
return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2);
}
/**
* vcn_v4_0_enc_ring_get_wptr - get enc write pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware enc write pointer
*/
static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
if (ring->use_doorbell)
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
} else {
if (ring->use_doorbell)
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2);
}
}
/**
* vcn_v4_0_enc_ring_set_wptr - set enc write pointer
* vcn_v4_0_unified_ring_set_wptr - set enc write pointer
*
* @ring: amdgpu_ring pointer
*
* Commits the enc write pointer to the hardware
*/
static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
if (ring->use_doorbell) {
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
}
if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
DRM_ERROR("wrong ring id is identified in %s", __func__);
if (ring->use_doorbell) {
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
if (ring->use_doorbell) {
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr));
}
WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
}
}
static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
.vmhub = AMDGPU_MMHUB_0,
.get_rptr = vcn_v4_0_enc_ring_get_rptr,
.get_wptr = vcn_v4_0_enc_ring_get_wptr,
.set_wptr = vcn_v4_0_enc_ring_set_wptr,
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
.emit_fence = vcn_v2_0_enc_ring_emit_fence,
.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
.test_ring = amdgpu_vcn_enc_ring_test_ring,
.test_ib = amdgpu_vcn_enc_ring_test_ib,
.test_ib = amdgpu_vcn_unified_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v2_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
};
/**
* vcn_v4_0_set_dec_ring_funcs - set dec ring functions
* vcn_v4_0_set_unified_ring_funcs - set unified ring functions
*
* @adev: amdgpu_device pointer
*
* Set decode ring functions
* Set unified ring functions
*/
static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
{
int i;
@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs;
adev->vcn.inst[i].ring_dec.me = i;
DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i);
}
}
adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[0].me = i;
/**
* vcn_v4_0_set_enc_ring_funcs - set enc ring functions
*
* @adev: amdgpu_device pointer
*
* Set encode ring functions
*/
static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i, j;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[j].me = i;
}
DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
}
}
@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
DRM_DEBUG("IH: VCN TRAP\n");
switch (entry->src_id) {
case VCN_4_0__SRCID__UVD_TRAP:
if (!unifiedQ_enabled) {
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
break;
}
break;
case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
break;
case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);

View file

@ -25,3 +25,17 @@ config HSA_AMD_SVM
preemptions and one based on page faults. To enable page fault
based memory management on most GFXv9 GPUs, set the module
parameter amdgpu.noretry=0.
config HSA_AMD_P2P
bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
help
Enable peer-to-peer (P2P) communication between AMD GPUs over
the PCIe bus. This can improve performance of multi-GPU compute
applications and libraries by enabling GPUs to access data directly
in peer GPUs' memory without intermediate copies in system memory.
This P2P feature is only enabled on compatible chipsets, and between
GPUs with large memory BARs that expose the entire VRAM in PCIe bus
address space within the physical address limits of the GPUs.

View file

@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
static struct class *kfd_class;
struct device *kfd_device;
static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
{
struct kfd_process_device *pdd;
mutex_lock(&p->mutex);
pdd = kfd_process_device_data_by_id(p, gpu_id);
if (pdd)
return pdd;
mutex_unlock(&p->mutex);
return NULL;
}
static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
{
mutex_unlock(&pdd->process->mutex);
}
int kfd_chardev_init(void)
{
int err = 0;
@ -280,6 +299,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
struct amdgpu_bo *wptr_bo = NULL;
memset(&q_properties, 0, sizeof(struct queue_properties));
@ -307,12 +327,49 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
*/
if (dev->shared_resources.enable_mes &&
((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
>> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
struct amdgpu_bo_va_mapping *wptr_mapping;
struct amdgpu_vm *wptr_vm;
wptr_vm = drm_priv_to_vm(pdd->drm_priv);
err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
if (err)
goto err_wptr_map_gart;
wptr_mapping = amdgpu_vm_bo_lookup_mapping(
wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
amdgpu_bo_unreserve(wptr_vm->root.bo);
if (!wptr_mapping) {
pr_err("Failed to lookup wptr bo\n");
err = -EINVAL;
goto err_wptr_map_gart;
}
wptr_bo = wptr_mapping->bo_va->base.bo;
if (wptr_bo->tbo.base.size > PAGE_SIZE) {
pr_err("Requested GART mapping for wptr bo larger than one page\n");
err = -EINVAL;
goto err_wptr_map_gart;
}
err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
if (err) {
pr_err("Failed to map wptr bo to GART\n");
goto err_wptr_map_gart;
}
}
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
&doorbell_offset_in_process);
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@ -344,6 +401,9 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return 0;
err_create_queue:
if (wptr_bo)
amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
err_wptr_map_gart:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
@ -958,6 +1018,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
return false;
}
static int kfd_ioctl_get_available_memory(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_available_memory_args *args = data;
struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
if (!pdd)
return -EINVAL;
args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
kfd_unlock_pdd(pdd);
return 0;
}
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
struct kfd_process *p, void *data)
{
@ -2361,7 +2434,7 @@ static int criu_restore(struct file *filep,
* Set the process to evicted state to avoid running any new queues before all the memory
* mappings are ready.
*/
ret = kfd_process_evict_queues(p);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
if (ret)
goto exit_unlock;
@ -2480,7 +2553,7 @@ static int criu_process_info(struct file *filep,
goto err_unlock;
}
ret = kfd_process_evict_queues(p);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
if (ret)
goto err_unlock;
@ -2648,6 +2721,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
kfd_ioctl_get_available_memory, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)

View file

@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(&props->list, &dev->io_link_props);
break;
@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(&props2->list, &to_dev->io_link_props);
}

View file

@ -75,7 +75,6 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
case IP_VERSION(6, 0, 1):
kfd->device_info.num_sdma_queues_per_engine = 2;
break;
case IP_VERSION(4, 2, 0):/* VEGA20 */
@ -90,6 +89,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
kfd->device_info.num_sdma_queues_per_engine = 8;
break;
@ -837,7 +837,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
int kgd2kfd_quiesce_mm(struct mm_struct *mm)
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
{
struct kfd_process *p;
int r;
@ -851,7 +851,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
return -ESRCH;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
r = kfd_process_evict_queues(p);
r = kfd_process_evict_queues(p, trigger);
kfd_unref_process(p);
return r;

View file

@ -177,6 +177,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mes_add_queue_input queue_input;
int r, queue_type;
uint64_t wptr_addr_off;
if (dqm->is_hws_hang)
return -EIO;
@ -197,6 +198,14 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.doorbell_offset = q->properties.doorbell_off;
queue_input.mqd_addr = q->gart_mqd_addr;
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
if (q->wptr_bo) {
wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
}
queue_input.is_kfd_process = 1;
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
@ -811,7 +820,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
bool prev_active = false;
bool add_queue = false;
dqm_lock(dqm);
pdd = kfd_get_process_device_data(q->device, q->process);
@ -887,7 +895,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
if (!dqm->dev->shared_resources.enable_mes)
retval = map_queues_cpsch(dqm);
else if (add_queue)
else if (q->properties.is_active)
retval = add_queue_mes(dqm, q, &pdd->qpd);
} else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||

View file

@ -377,8 +377,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
return -EINVAL;
}
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
mem, &kern_addr, &size);
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
return err;
@ -387,7 +386,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
if (err) {
pr_err("Failed to set event page\n");
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
return err;
}
return err;

View file

@ -33,6 +33,7 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
@ -402,8 +403,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
uint64_t end)
uint64_t end, uint32_t trigger)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, adev->kfd.dev->id, prange->prefetch_loc,
prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate);
if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, adev->kfd.dev->id, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
@ -479,6 +490,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: the process mm structure
* @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@ -487,7 +499,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
*/
static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
struct mm_struct *mm, uint32_t trigger)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
break;
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
@ -655,8 +667,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
*/
static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
struct vm_area_struct *vma, uint64_t start, uint64_t end,
uint32_t trigger)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
unsigned long upages = npages;
unsigned long cpages = 0;
@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
adev->kfd.dev->id, 0, prange->prefetch_loc,
prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate);
if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
adev->kfd.dev->id, 0, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
out_free:
@ -732,13 +756,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
* svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure
* @mm: process mm, use current->mm if NULL
* @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
* 0 - OK, otherwise error code
*/
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
uint32_t trigger)
{
struct amdgpu_device *adev;
struct vm_area_struct *vma;
@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
}
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
if (r < 0) {
pr_debug("failed %ld to migrate prange %p\n", r, prange);
break;
@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: process mm, use current->mm if NULL
* @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
struct mm_struct *mm, uint32_t trigger)
{
int r, retries = 3;
@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do {
r = svm_migrate_vram_to_ram(prange, mm);
r = svm_migrate_vram_to_ram(prange, mm, trigger);
if (r)
return r;
} while (prange->actual_loc && --retries);
@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc)
return -EDEADLK;
return svm_migrate_ram_to_vram(prange, best_loc, mm);
return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
struct mm_struct *mm, uint32_t trigger)
{
if (!prange->actual_loc)
return svm_migrate_ram_to_vram(prange, best_loc, mm);
return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
else
return svm_migrate_vram_to_vram(prange, best_loc, mm);
return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
}
@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
goto out_unlock_prange;
}
r = svm_migrate_vram_to_ram(prange, mm);
r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
if (r)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
prange, prange->start, prange->last);

View file

@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
struct mm_struct *mm, uint32_t trigger);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
uint32_t trigger);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);

View file

@ -100,7 +100,9 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
int i, se, sh, cu, cu_bitmap_sh_mul;
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
@ -167,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
se_mask[i] = 0;
i = 0;
for (cu = 0; cu < 16; cu++) {
for (cu = 0; cu < 16; cu += inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
if (cu_mask[i / 32] & (1 << (i % 32)))
se_mask[se] |= 1 << (cu + sh * 16);
i++;
if (cu_mask[i / 32] & (en_mask << (i % 32)))
se_mask[se] |= en_mask << (cu + sh * 16);
i += inc;
if (i == cu_mask_count)
return;
}

View file

@ -377,6 +377,8 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
m->sdmax_rlcx_doorbell_offset =
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;

View file

@ -571,6 +571,8 @@ struct queue {
void *gang_ctx_bo;
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
struct amdgpu_bo *wptr_bo;
};
enum KFD_MQD_TYPE {
@ -945,7 +947,7 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
}
void kfd_unref_process(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
@ -1206,6 +1208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,

View file

@ -43,6 +43,7 @@ struct mm_struct;
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
#include "kfd_smi_events.h"
/*
* List of struct kfd_process (field kfd_process).
@ -693,7 +694,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
struct kfd_dev *dev = pdd->dev;
if (kptr) {
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
kptr = NULL;
}
@ -733,7 +734,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
}
if (kptr) {
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
(struct kgd_mem *)*mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
@ -999,7 +1000,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
if (!mem)
goto out;
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
out:
mutex_unlock(&p->mutex);
@ -1736,7 +1737,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
int kfd_process_evict_queues(struct kfd_process *p)
int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
{
int r = 0;
int i;
@ -1745,6 +1746,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
trigger);
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
/* evict return -EIO if HWS is hang or asic is resetting, in this case
@ -1769,6 +1773,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
if (n_evicted == 0)
break;
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
pr_err("Failed to restore queues\n");
@ -1788,6 +1795,8 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
@ -1849,7 +1858,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid 0x%x\n", p->pasid);
ret = kfd_process_evict_queues(p);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@ -1916,7 +1925,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
if (kfd_process_evict_queues(p))
if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);

View file

@ -180,7 +180,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
struct file *f, unsigned int qid)
struct file *f, struct amdgpu_bo *wptr_bo,
unsigned int qid)
{
int retval;
@ -210,6 +211,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
(*q)->wptr_bo = wptr_bo;
}
pr_debug("PQM After init queue");
@ -226,6 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@ -288,7 +291,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@ -309,7 +312,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@ -436,9 +439,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
if (dev->shared_resources.enable_mes)
if (dev->shared_resources.enable_mes) {
amdgpu_amdkfd_free_gtt_mem(dev->adev,
pqn->q->gang_ctx_bo);
if (pqn->q->wptr_bo)
amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
}
uninit_queue(pqn->q);
}
@ -491,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return -EFAULT;
}
/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
int i;
for (i = 0; i < minfo->cu_mask.count; i += 2) {
uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
if (cu_pair && cu_pair != 0x3) {
pr_debug("CUs must be adjacent pairwise enabled.\n");
return -EINVAL;
}
}
}
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, minfo);
if (retval != 0)
@ -844,7 +866,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);

View file

@ -38,6 +38,9 @@ struct kfd_smi_client {
uint64_t events;
struct kfd_dev *dev;
spinlock_t lock;
struct rcu_head rcu;
pid_t pid;
bool suser;
};
#define MAX_KFIFO_SIZE 1024
@ -135,6 +138,14 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
return sizeof(events);
}
static void kfd_smi_ev_client_free(struct rcu_head *p)
{
struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
kfifo_free(&ev->fifo);
kfree(ev);
}
static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
{
struct kfd_smi_client *client = filep->private_data;
@ -144,23 +155,31 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
list_del_rcu(&client->list);
spin_unlock(&dev->smi_lock);
synchronize_rcu();
kfifo_free(&client->fifo);
kfree(client);
call_rcu(&client->rcu, kfd_smi_ev_client_free);
return 0;
}
static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
char *event_msg, int len)
static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
unsigned int event)
{
uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
uint64_t events = READ_ONCE(client->events);
if (pid && client->pid != pid && !(client->suser && (events & all)))
return false;
return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
}
static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
unsigned int smi_event, char *event_msg, int len)
{
struct kfd_smi_client *client;
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
if (!(READ_ONCE(client->events) &
KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
if (!kfd_smi_ev_enabled(pid, client, smi_event))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
@ -176,9 +195,9 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
__printf(3, 4)
static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
char *fmt, ...)
__printf(4, 5)
static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev,
unsigned int event, char *fmt, ...)
{
char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
@ -193,7 +212,7 @@ static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
va_end(args);
add_event_to_kfifo(dev, event, fifo_in, len);
add_event_to_kfifo(pid, dev, event, fifo_in, len);
}
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
@ -206,13 +225,13 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num);
kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask)
{
kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
throttle_bitmask,
amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
}
@ -227,10 +246,93 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid)
return;
kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
task_info.pid, task_info.task_name);
}
void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool write_fault,
ktime_t ts)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,
"%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
address, dev->id, write_fault ? 'W' : 'R');
}
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool migration)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,
"%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
pid, address, dev->id, migration ? 'M' : 'U');
}
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start,
from, to, prefetch_loc, preferred_loc, trigger);
}
void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
"%lld -%d @%lx(%lx) %x->%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start,
from, to, trigger);
}
void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION,
"%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
dev->id, trigger);
}
void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE,
"%lld -%d %x\n", ktime_get_boottime_ns(), pid,
dev->id);
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
{
struct kfd_process *p;
int i;
p = kfd_lookup_process_by_mm(mm);
if (!p)
return;
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
"%lld -%d %x %c\n", ktime_get_boottime_ns(),
p->lead_thread->pid, pdd->dev->id, 'R');
}
kfd_unref_process(p);
}
void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
unsigned long address, unsigned long last,
uint32_t trigger)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU,
"%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
pid, address, last - address + 1, dev->id, trigger);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
{
struct kfd_smi_client *client;
@ -251,6 +353,8 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
spin_lock_init(&client->lock);
client->events = 0;
client->dev = dev;
client->pid = current->tgid;
client->suser = capable(CAP_SYS_ADMIN);
spin_lock(&dev->smi_lock);
list_add_rcu(&client->list, &dev->smi_clients);

View file

@ -29,5 +29,24 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool write_fault,
ktime_t ts);
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool migration);
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger);
void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger);
void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
uint32_t trigger);
void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid);
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
unsigned long address, unsigned long last,
uint32_t trigger);
#endif

View file

@ -32,6 +32,7 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
@ -43,7 +44,7 @@
/* Long enough to ensure no retry fault comes after svm range is restored and
* page table is updated.
*/
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
struct criu_svm_metadata {
struct list_head list;
@ -1199,7 +1200,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
static int
svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
unsigned long last)
unsigned long last, uint32_t trigger)
{
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
@ -1231,6 +1232,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
return -EINVAL;
}
kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
start, last, trigger);
r = svm_range_unmap_from_gpu(pdd->dev->adev,
drm_priv_to_vm(pdd->drm_priv),
start, last, &fence);
@ -1617,7 +1621,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
svm_range_unreserve_bos(&ctx);
if (!r)
prange->validate_timestamp = ktime_to_us(ktime_get());
prange->validate_timestamp = ktime_get_boottime();
return r;
}
@ -1729,14 +1733,16 @@ static void svm_range_restore_work(struct work_struct *work)
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
mutex_unlock(&process_info->lock);
mmput(mm);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n");
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm);
}
mmput(mm);
}
/**
@ -1756,7 +1762,8 @@ static void svm_range_restore_work(struct work_struct *work)
*/
static int
svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
unsigned long start, unsigned long last)
unsigned long start, unsigned long last,
enum mmu_notifier_event event)
{
struct svm_range_list *svms = prange->svms;
struct svm_range *pchild;
@ -1792,7 +1799,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
prange->svms, prange->start, prange->last);
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mm);
r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
if (r)
pr_debug("failed to quiesce KFD\n");
@ -1801,6 +1808,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
unsigned long s, l;
uint32_t trigger;
if (event == MMU_NOTIFY_MIGRATE)
trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
else
trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
prange->svms, start, last);
@ -1809,13 +1822,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l);
svm_range_unmap_from_gpus(pchild, s, l, trigger);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l);
svm_range_unmap_from_gpus(prange, s, l, trigger);
}
return r;
@ -2229,6 +2242,7 @@ static void
svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
unsigned long start, unsigned long last)
{
uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
struct svm_range_list *svms;
struct svm_range *pchild;
struct kfd_process *p;
@ -2256,14 +2270,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l);
svm_range_unmap_from_gpus(pchild, s, l, trigger);
svm_range_unmap_split(mm, prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l);
svm_range_unmap_from_gpus(prange, s, l, trigger);
svm_range_unmap_split(mm, prange, prange, start, last);
if (unmap_parent)
@ -2330,7 +2344,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break;
default:
svm_range_evict(prange, mni->mm, start, last);
svm_range_evict(prange, mni->mm, start, last, range->event);
break;
}
@ -2694,11 +2708,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
uint64_t timestamp;
ktime_t timestamp = ktime_get_boottime();
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
bool migration = false;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@ -2775,9 +2790,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out_unlock_range;
}
timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
/* skip duplicate vm fault on different pages of same range */
if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
r = 0;
@ -2813,9 +2828,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
write_fault, timestamp);
if (prange->actual_loc != best_loc) {
migration = true;
if (best_loc) {
r = svm_migrate_to_vram(prange, best_loc, mm);
r = svm_migrate_to_vram(prange, best_loc, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
@ -2823,12 +2843,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
* VRAM failed
*/
if (prange->actual_loc)
r = svm_migrate_vram_to_ram(prange, mm);
r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
else
r = 0;
}
} else {
r = svm_migrate_vram_to_ram(prange, mm);
r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
@ -2842,6 +2864,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
migration);
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
out_unlock_svms:
@ -3148,12 +3173,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
return 0;
if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm);
r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
return r;
}
r = svm_migrate_to_vram(prange, best_loc, mm);
r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
return r;
@ -3211,7 +3236,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
r = svm_migrate_vram_to_ram(prange,
svm_bo->eviction_fence->mm);
svm_bo->eviction_fence->mm,
KFD_MIGRATE_TRIGGER_TTM_EVICTION);
} while (!r && prange->actual_loc && --retries);
if (!r && prange->actual_loc)

View file

@ -125,7 +125,7 @@ struct svm_range {
uint32_t actual_loc;
uint8_t granularity;
atomic_t invalid;
uint64_t validate_timestamp;
ktime_t validate_timestamp;
struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item;
struct list_head deferred_list;

View file

@ -40,6 +40,7 @@
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
#include "amdgpu.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
@ -148,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
struct kfd_iolink_properties *p2plink;
struct kfd_perf_properties *perf;
list_del(&dev->list);
@ -173,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
kfree(iolink);
}
while (dev->p2p_link_props.next != &dev->p2p_link_props) {
p2plink = container_of(dev->p2p_link_props.next,
struct kfd_iolink_properties, list);
list_del(&p2plink->list);
kfree(p2plink);
}
while (dev->perf_props.next != &dev->perf_props) {
perf = container_of(dev->perf_props.next,
struct kfd_perf_properties, list);
@ -214,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device(
INIT_LIST_HEAD(&dev->mem_props);
INIT_LIST_HEAD(&dev->cache_props);
INIT_LIST_HEAD(&dev->io_link_props);
INIT_LIST_HEAD(&dev->p2p_link_props);
INIT_LIST_HEAD(&dev->perf_props);
list_add_tail(&dev->list, device_list);
@ -465,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.caches_count);
sysfs_show_32bit_prop(buffer, offs, "io_links_count",
dev->node_props.io_links_count);
sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
dev->node_props.p2p_links_count);
sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
dev->node_props.cpu_core_id_base);
sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
@ -568,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
{
struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@ -585,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
dev->kobj_iolink = NULL;
}
if (dev->kobj_p2plink) {
list_for_each_entry(p2plink, &dev->p2p_link_props, list)
if (p2plink->kobj) {
kfd_remove_sysfs_file(p2plink->kobj,
&p2plink->attr);
p2plink->kobj = NULL;
}
kobject_del(dev->kobj_p2plink);
kobject_put(dev->kobj_p2plink);
dev->kobj_p2plink = NULL;
}
if (dev->kobj_cache) {
list_for_each_entry(cache, &dev->cache_props, list)
if (cache->kobj) {
@ -631,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
uint32_t id)
{
struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@ -668,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (!dev->kobj_iolink)
return -ENOMEM;
dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
if (!dev->kobj_p2plink)
return -ENOMEM;
dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
if (!dev->kobj_perf)
return -ENOMEM;
@ -757,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
i++;
}
i = 0;
list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
if (!p2plink->kobj)
return -ENOMEM;
ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
dev->kobj_p2plink, "%d", i);
if (ret < 0) {
kobject_put(p2plink->kobj);
return ret;
}
p2plink->attr.name = "properties";
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&iolink->attr);
ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
if (ret < 0)
return ret;
i++;
}
/* All hardware blocks have the same number of attributes. */
num_attrs = ARRAY_SIZE(perf_attr_iommu);
list_for_each_entry(perf, &dev->perf_props, list) {
@ -1145,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
struct kfd_iolink_properties *p2plink;
down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list) {
@ -1165,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
cache->gpu = dev->gpu;
list_for_each_entry(iolink, &dev->io_link_props, list)
iolink->gpu = dev->gpu;
list_for_each_entry(p2plink, &dev->p2p_link_props, list)
p2plink->gpu = dev->gpu;
break;
}
}
@ -1287,6 +1341,253 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
}
}
/* Create indirect links so apply flags setting to all */
list_for_each_entry(link, &dev->p2p_link_props, list) {
link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(dev, NULL, link);
peer_dev = kfd_topology_device_by_proximity_domain(
link->node_to);
if (!peer_dev)
continue;
list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
list) {
if (inbound_link->node_to != link->node_from)
continue;
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
}
}
}
static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
struct kfd_iolink_properties *p2plink)
{
int ret;
p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
if (!p2plink->kobj)
return -ENOMEM;
ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
if (ret < 0) {
kobject_put(p2plink->kobj);
return ret;
}
p2plink->attr.name = "properties";
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&p2plink->attr);
ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
if (ret < 0)
return ret;
return 0;
}
static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
{
struct kfd_iolink_properties *props = NULL, *props2 = NULL;
struct kfd_iolink_properties *gpu_link, *cpu_link;
struct kfd_topology_device *cpu_dev;
int ret = 0;
int i, num_cpu;
num_cpu = 0;
list_for_each_entry(cpu_dev, &topology_device_list, list) {
if (cpu_dev->gpu)
break;
num_cpu++;
}
gpu_link = list_first_entry(&kdev->io_link_props,
struct kfd_iolink_properties, list);
if (!gpu_link)
return -ENOMEM;
for (i = 0; i < num_cpu; i++) {
/* CPU <--> GPU */
if (gpu_link->node_to == i)
continue;
/* find CPU <--> CPU links */
cpu_dev = kfd_topology_device_by_proximity_domain(i);
if (cpu_dev) {
list_for_each_entry(cpu_link,
&cpu_dev->io_link_props, list) {
if (cpu_link->node_to == gpu_link->node_to)
break;
}
}
if (cpu_link->node_to != gpu_link->node_to)
return -ENOMEM;
/* CPU <--> CPU <--> GPU, GPU node*/
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
props->weight = gpu_link->weight + cpu_link->weight;
props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
props->node_from = gpu_node;
props->node_to = i;
kdev->node_props.p2p_links_count++;
list_add_tail(&props->list, &kdev->p2p_link_props);
ret = kfd_build_p2p_node_entry(kdev, props);
if (ret < 0)
return ret;
/* for small Bar, no CPU --> GPU in-direct links */
if (kfd_dev_is_large_bar(kdev->gpu)) {
/* CPU <--> CPU <--> GPU, CPU node*/
props2 = kfd_alloc_struct(props2);
if (!props2)
return -ENOMEM;
memcpy(props2, props, sizeof(struct kfd_iolink_properties));
props2->node_from = i;
props2->node_to = gpu_node;
props2->kobj = NULL;
cpu_dev->node_props.p2p_links_count++;
list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
ret = kfd_build_p2p_node_entry(cpu_dev, props2);
if (ret < 0)
return ret;
}
}
return ret;
}
#if defined(CONFIG_HSA_AMD_P2P)
static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
struct kfd_topology_device *peer, int from, int to)
{
struct kfd_iolink_properties *props = NULL;
struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
struct kfd_topology_device *cpu_dev;
int ret = 0;
if (!amdgpu_device_is_peer_accessible(
kdev->gpu->adev,
peer->gpu->adev))
return ret;
iolink1 = list_first_entry(&kdev->io_link_props,
struct kfd_iolink_properties, list);
if (!iolink1)
return -ENOMEM;
iolink2 = list_first_entry(&peer->io_link_props,
struct kfd_iolink_properties, list);
if (!iolink2)
return -ENOMEM;
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
props->weight = iolink1->weight + iolink2->weight;
props->min_latency = iolink1->min_latency + iolink2->min_latency;
props->max_latency = iolink1->max_latency + iolink2->max_latency;
props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
if (iolink1->node_to != iolink2->node_to) {
/* CPU->CPU link*/
cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
if (cpu_dev) {
list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
if (iolink3->node_to == iolink2->node_to)
break;
props->weight += iolink3->weight;
props->min_latency += iolink3->min_latency;
props->max_latency += iolink3->max_latency;
props->min_bandwidth = min(props->min_bandwidth,
iolink3->min_bandwidth);
props->max_bandwidth = min(props->max_bandwidth,
iolink3->max_bandwidth);
} else {
WARN(1, "CPU node not found");
}
}
props->node_from = from;
props->node_to = to;
peer->node_props.p2p_links_count++;
list_add_tail(&props->list, &peer->p2p_link_props);
ret = kfd_build_p2p_node_entry(peer, props);
return ret;
}
#endif
static int kfd_dev_create_p2p_links(void)
{
struct kfd_topology_device *dev;
struct kfd_topology_device *new_dev;
#if defined(CONFIG_HSA_AMD_P2P)
uint32_t i;
#endif
uint32_t k;
int ret = 0;
k = 0;
list_for_each_entry(dev, &topology_device_list, list)
k++;
if (k < 2)
return 0;
new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
if (WARN_ON(!new_dev->gpu))
return 0;
k--;
/* create in-direct links */
ret = kfd_create_indirect_link_prop(new_dev, k);
if (ret < 0)
goto out;
/* create p2p links */
#if defined(CONFIG_HSA_AMD_P2P)
i = 0;
list_for_each_entry(dev, &topology_device_list, list) {
if (dev == new_dev)
break;
if (!dev->gpu || !dev->gpu->adev ||
(dev->gpu->hive_id &&
dev->gpu->hive_id == new_dev->gpu->hive_id))
goto next;
/* check if node(s) is/are peer accessible in one direction or bi-direction */
ret = kfd_add_peer_prop(new_dev, dev, i, k);
if (ret < 0)
goto out;
ret = kfd_add_peer_prop(dev, new_dev, k, i);
if (ret < 0)
goto out;
next:
i++;
}
#endif
out:
return ret;
}
int kfd_topology_add_device(struct kfd_dev *gpu)
@ -1305,7 +1606,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
INIT_LIST_HEAD(&temp_topology_device_list);
gpu_id = kfd_generate_gpu_id(gpu);
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
/* Check to see if this gpu device exists in the topology_device_list.
@ -1362,6 +1662,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
kfd_dev_create_p2p_links();
/* TODO: Move the following lines to function
* kfd_add_non_crat_information
*/
@ -1507,7 +1809,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
static void kfd_topology_update_io_links(int proximity_domain)
{
struct kfd_topology_device *dev;
struct kfd_iolink_properties *iolink, *tmp;
struct kfd_iolink_properties *iolink, *p2plink, *tmp;
list_for_each_entry(dev, &topology_device_list, list) {
if (dev->proximity_domain > proximity_domain)
@ -1520,7 +1822,6 @@ static void kfd_topology_update_io_links(int proximity_domain)
*/
if (iolink->node_to == proximity_domain) {
list_del(&iolink->list);
dev->io_link_count--;
dev->node_props.io_links_count--;
} else {
if (iolink->node_from > proximity_domain)
@ -1529,6 +1830,22 @@ static void kfd_topology_update_io_links(int proximity_domain)
iolink->node_to--;
}
}
list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
/*
* If there is a p2p link to the dev being deleted
* then remove that p2p link also.
*/
if (p2plink->node_to == proximity_domain) {
list_del(&p2plink->list);
dev->node_props.p2p_links_count--;
} else {
if (p2plink->node_from > proximity_domain)
p2plink->node_from--;
if (p2plink->node_to > proximity_domain)
p2plink->node_to--;
}
}
}
}

View file

@ -38,6 +38,7 @@ struct kfd_node_properties {
uint32_t mem_banks_count;
uint32_t caches_count;
uint32_t io_links_count;
uint32_t p2p_links_count;
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
@ -129,14 +130,15 @@ struct kfd_topology_device {
struct list_head mem_props;
uint32_t cache_count;
struct list_head cache_props;
uint32_t io_link_count;
struct list_head io_link_props;
struct list_head p2p_link_props;
struct list_head perf_props;
struct kfd_dev *gpu;
struct kobject *kobj_node;
struct kobject *kobj_mem;
struct kobject *kobj_cache;
struct kobject *kobj_iolink;
struct kobject *kobj_p2plink;
struct kobject *kobj_perf;
struct attribute attr_gpuid;
struct attribute attr_name;

View file

@ -91,10 +91,14 @@
#include "dcn/dcn_1_0_offset.h"
#include "dcn/dcn_1_0_sh_mask.h"
#include "soc15_hw_ip.h"
#include "soc15_common.h"
#include "vega10_ip_offset.h"
#include "soc15_common.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
#include "modules/inc/mod_freesync.h"
#include "modules/power/power_helpers.h"
#include "modules/inc/mod_info_packet.h"
@ -120,6 +124,11 @@ MODULE_FIRMWARE(FIRMWARE_DCN_315_DMUB);
#define FIRMWARE_DCN316_DMUB "amdgpu/dcn_3_1_6_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN316_DMUB);
#define FIRMWARE_DCN_V3_2_0_DMCUB "amdgpu/dcn_3_2_0_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN_V3_2_0_DMCUB);
#define FIRMWARE_DCN_V3_2_1_DMCUB "amdgpu/dcn_3_2_1_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN_V3_2_1_DMCUB);
#define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
@ -1258,10 +1267,20 @@ static void vblank_control_worker(struct work_struct *work)
DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
/* Control PSR based on vblank requirements from OS */
/*
* Control PSR based on vblank requirements from OS
*
* If panel supports PSR SU, there's no need to disable PSR when OS is
* submitting fast atomic commits (we infer this by whether the OS
* requests vblank events). Fast atomic commits will simply trigger a
* full-frame-update (FFU); a specific case of selective-update (SU)
* where the SU region is the full hactive*vactive region. See
* fill_dc_dirty_rects().
*/
if (vblank_work->stream && vblank_work->stream->link) {
if (vblank_work->enable) {
if (vblank_work->stream->link->psr_settings.psr_allow_active)
if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
vblank_work->stream->link->psr_settings.psr_allow_active)
amdgpu_dm_psr_disable(vblank_work->stream);
} else if (vblank_work->stream->link->psr_settings.psr_feature_enabled &&
!vblank_work->stream->link->psr_settings.psr_allow_active &&
@ -1509,6 +1528,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
DRM_INFO("Seamless boot condition check passed\n");
}
init_data.flags.enable_mipi_converter_optimization = true;
INIT_LIST_HEAD(&adev->dm.da_list);
/* Display Core create. */
adev->dm.dc = dc_create(&init_data);
@ -1803,6 +1824,8 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
return 0;
default:
break;
@ -1926,6 +1949,14 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
dmub_asic = DMUB_ASIC_DCN316;
fw_name_dmub = FIRMWARE_DCN316_DMUB;
break;
case IP_VERSION(3, 2, 0):
dmub_asic = DMUB_ASIC_DCN32;
fw_name_dmub = FIRMWARE_DCN_V3_2_0_DMCUB;
break;
case IP_VERSION(3, 2, 1):
dmub_asic = DMUB_ASIC_DCN321;
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
default:
/* ASIC doesn't support DMUB. */
return 0;
@ -2172,7 +2203,8 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
} else {
ret = drm_dp_mst_topology_mgr_resume(mgr, true);
if (ret < 0) {
drm_dp_mst_topology_mgr_set_mst(mgr, false);
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
aconnector->dc_link);
need_hotplug = true;
}
}
@ -2554,34 +2586,6 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
return;
}
static void dm_set_dpms_off(struct dc_link *link, struct dm_crtc_state *acrtc_state)
{
struct dc_stream_state *stream_state;
struct amdgpu_dm_connector *aconnector = link->priv;
struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
struct dc_stream_update stream_update;
bool dpms_off = true;
memset(&stream_update, 0, sizeof(stream_update));
stream_update.dpms_off = &dpms_off;
mutex_lock(&adev->dm.dc_lock);
stream_state = dc_stream_find_from_link(link);
if (stream_state == NULL) {
DRM_DEBUG_DRIVER("Error finding stream state associated with link!\n");
mutex_unlock(&adev->dm.dc_lock);
return;
}
stream_update.stream = stream_state;
acrtc_state->force_dpms_off = true;
dc_commit_updates_for_stream(stream_state->ctx->dc, NULL, 0,
stream_state, &stream_update,
stream_state->ctx->dc->current_state);
mutex_unlock(&adev->dm.dc_lock);
}
static int dm_resume(void *handle)
{
struct amdgpu_device *adev = handle;
@ -2814,7 +2818,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
{
u32 max_cll, min_cll, max, min, q, r;
u32 max_avg, min_cll, max, min, q, r;
struct amdgpu_dm_backlight_caps *caps;
struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
@ -2844,7 +2848,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps = &dm->backlight_caps[i];
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
caps->aux_support = false;
max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
if (caps->ext_caps->bits.oled == 1 /*||
@ -2872,8 +2876,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
* The results of the above expressions can be verified at
* pre_computed_values.
*/
q = max_cll >> 5;
r = max_cll % 32;
q = max_avg >> 5;
r = max_avg % 32;
max = (1 << q) * pre_computed_values[r];
// min luminance: maxLum * (CV/255)^2 / 100
@ -3032,16 +3036,13 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
struct drm_device *dev = connector->dev;
enum dc_connection_type new_connection_type = dc_connection_none;
struct amdgpu_device *adev = drm_to_adev(dev);
#ifdef CONFIG_DRM_AMD_DC_HDCP
struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state);
struct dm_crtc_state *dm_crtc_state = NULL;
#endif
if (adev->dm.disable_hpd_irq)
return;
if (dm_con_state->base.state && dm_con_state->base.crtc)
dm_crtc_state = to_dm_crtc_state(drm_atomic_get_crtc_state(
dm_con_state->base.state,
dm_con_state->base.crtc));
/*
* In case of failure or MST no need to update connector status or notify the OS
* since (for MST case) MST does this in its own context.
@ -3071,11 +3072,6 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
drm_kms_helper_connector_hotplug_event(connector);
} else if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) {
if (new_connection_type == dc_connection_none &&
aconnector->dc_link->type == dc_connection_none &&
dm_crtc_state)
dm_set_dpms_off(aconnector->dc_link, dm_crtc_state);
amdgpu_dm_update_connector_after_detect(aconnector);
drm_modeset_lock_all(dev);
@ -3868,9 +3864,6 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
#define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
int bl_idx)
{
@ -4074,7 +4067,6 @@ amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm)
else
DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name);
}
#endif
static int initialize_plane(struct amdgpu_display_manager *dm,
struct amdgpu_mode_info *mode_info, int plane_id,
@ -4120,9 +4112,6 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
static void register_backlight_device(struct amdgpu_display_manager *dm,
struct dc_link *link)
{
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
link->type != dc_connection_none) {
/*
@ -4138,7 +4127,6 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
dm->num_of_edps++;
}
}
#endif
}
@ -4235,6 +4223,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
case IP_VERSION(2, 1, 0):
if (register_outbox_irq_handlers(dm->adev)) {
DRM_ERROR("DM: Failed to initialize IRQ\n");
@ -4253,6 +4243,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
psr_feature_enabled = true;
break;
default:
@ -4261,9 +4253,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
}
}
/* Disable vblank IRQs aggressively for power-saving. */
adev_to_drm(adev)->vblank_disable_immediate = true;
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
@ -4370,6 +4359,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
if (dcn10_register_irq_handlers(dm->adev)) {
DRM_ERROR("DM: Failed to initialize IRQ\n");
goto fail;
@ -4556,6 +4547,8 @@ static int dm_early_init(void *handle)
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
adev->mode_info.num_crtc = 4;
adev->mode_info.num_hpd = 4;
adev->mode_info.num_dig = 4;
@ -4865,7 +4858,9 @@ fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier);
unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier);
unsigned int pipes_log2 = min(4u, mod_pipe_xor_bits);
unsigned int pipes_log2;
pipes_log2 = min(5u, mod_pipe_xor_bits);
fill_gfx9_tiling_info_from_device(adev, tiling_info);
@ -5201,8 +5196,73 @@ add_gfx10_3_modifiers(const struct amdgpu_device *adev,
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
}
static void
add_gfx11_modifiers(struct amdgpu_device *adev,
uint64_t **mods, uint64_t *size, uint64_t *capacity)
{
int num_pipes = 0;
int pipe_xor_bits = 0;
int num_pkrs = 0;
int pkrs = 0;
u32 gb_addr_config;
u8 i = 0;
unsigned swizzle_r_x;
uint64_t modifier_r_x;
uint64_t modifier_dcc_best;
uint64_t modifier_dcc_4k;
/* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from
* adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} */
gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
ASSERT(gb_addr_config != 0);
num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
pkrs = ilog2(num_pkrs);
num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES);
pipe_xor_bits = ilog2(num_pipes);
for (i = 0; i < 2; i++) {
/* Insert the best one first. */
/* R_X swizzle modes are the best for rendering and DCC requires them. */
if (num_pipes > 16)
swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X;
else
swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X;
modifier_r_x = AMD_FMT_MOD |
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
AMD_FMT_MOD_SET(TILE, swizzle_r_x) |
AMD_FMT_MOD_SET(PACKERS, pkrs);
/* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */
modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) |
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) |
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B);
/* DCC settings for 4K and greater resolutions. (required by display hw) */
modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) |
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
add_modifier(mods, size, capacity, modifier_dcc_best);
add_modifier(mods, size, capacity, modifier_dcc_4k);
add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1));
add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1));
add_modifier(mods, size, capacity, modifier_r_x);
}
add_modifier(mods, size, capacity, AMD_FMT_MOD |
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D));
}
static int
get_plane_modifiers(const struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
{
uint64_t size = 0, capacity = 128;
*mods = NULL;
@ -5234,6 +5294,9 @@ get_plane_modifiers(const struct amdgpu_device *adev, unsigned int plane_type, u
else
add_gfx10_1_modifiers(adev, mods, &size, &capacity);
break;
case AMDGPU_FAMILY_GC_11_0_0:
add_gfx11_modifiers(adev, mods, &size, &capacity);
break;
}
add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
@ -5272,7 +5335,7 @@ fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
dcc->enable = 1;
dcc->meta_pitch = afb->base.pitches[1];
dcc->independent_64b_blks = independent_64b_blks;
if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) {
if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) {
if (independent_64b_blks && independent_128b_blks)
dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl;
else if (independent_128b_blks)
@ -5640,6 +5703,117 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
return 0;
}
/**
* fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
*
* @plane: DRM plane containing dirty regions that need to be flushed to the eDP
* remote fb
* @old_plane_state: Old state of @plane
* @new_plane_state: New state of @plane
* @crtc_state: New state of CRTC connected to the @plane
* @flip_addrs: DC flip tracking struct, which also tracts dirty rects
*
* For PSR SU, DC informs the DMUB uController of dirty rectangle regions
* (referred to as "damage clips" in DRM nomenclature) that require updating on
* the eDP remote buffer. The responsibility of specifying the dirty regions is
* amdgpu_dm's.
*
* A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the
* plane with regions that require flushing to the eDP remote buffer. In
* addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
* implicitly provide damage clips without any client support via the plane
* bounds.
*
* Today, amdgpu_dm only supports the MPO and cursor usecase.
*
* TODO: Also enable for FB_DAMAGE_CLIPS
*/
static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *old_plane_state,
struct drm_plane_state *new_plane_state,
struct drm_crtc_state *crtc_state,
struct dc_flip_addrs *flip_addrs)
{
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
struct rect *dirty_rects = flip_addrs->dirty_rects;
uint32_t num_clips;
bool bb_changed;
bool fb_changed;
uint32_t i = 0;
flip_addrs->dirty_rect_count = 0;
/*
* Cursor plane has it's own dirty rect update interface. See
* dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
*/
if (plane->type == DRM_PLANE_TYPE_CURSOR)
return;
/*
* Today, we only consider MPO use-case for PSR SU. If MPO not
* requested, and there is a plane update, do FFU.
*/
if (!dm_crtc_state->mpo_requested) {
dirty_rects[0].x = 0;
dirty_rects[0].y = 0;
dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay;
dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay;
flip_addrs->dirty_rect_count = 1;
DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
new_plane_state->plane->base.id,
dm_crtc_state->base.mode.crtc_hdisplay,
dm_crtc_state->base.mode.crtc_vdisplay);
return;
}
/*
* MPO is requested. Add entire plane bounding box to dirty rects if
* flipped to or damaged.
*
* If plane is moved or resized, also add old bounding box to dirty
* rects.
*/
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
fb_changed = old_plane_state->fb->base.id !=
new_plane_state->fb->base.id;
bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
old_plane_state->crtc_y != new_plane_state->crtc_y ||
old_plane_state->crtc_w != new_plane_state->crtc_w ||
old_plane_state->crtc_h != new_plane_state->crtc_h);
DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
new_plane_state->plane->base.id,
bb_changed, fb_changed, num_clips);
if (num_clips || fb_changed || bb_changed) {
dirty_rects[i].x = new_plane_state->crtc_x;
dirty_rects[i].y = new_plane_state->crtc_y;
dirty_rects[i].width = new_plane_state->crtc_w;
dirty_rects[i].height = new_plane_state->crtc_h;
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
new_plane_state->plane->base.id,
dirty_rects[i].x, dirty_rects[i].y,
dirty_rects[i].width, dirty_rects[i].height);
i += 1;
}
/* Add old plane bounding-box if plane is moved or resized */
if (bb_changed) {
dirty_rects[i].x = old_plane_state->crtc_x;
dirty_rects[i].y = old_plane_state->crtc_y;
dirty_rects[i].width = old_plane_state->crtc_w;
dirty_rects[i].height = old_plane_state->crtc_h;
DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
old_plane_state->plane->base.id,
dirty_rects[i].x, dirty_rects[i].y,
dirty_rects[i].width, dirty_rects[i].height);
i += 1;
}
flip_addrs->dirty_rect_count = i;
}
static void update_stream_scaling_settings(const struct drm_display_mode *mode,
const struct dm_connector_state *dm_state,
struct dc_stream_state *stream)
@ -6587,7 +6761,7 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
state->freesync_config = cur->freesync_config;
state->cm_has_degamma = cur->cm_has_degamma;
state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
state->force_dpms_off = cur->force_dpms_off;
state->mpo_requested = cur->mpo_requested;
/* TODO Duplicate dc_stream after objects are stream object is flattened */
return &state->base;
@ -6679,7 +6853,7 @@ static void dm_disable_vblank(struct drm_crtc *crtc)
dm_set_vblank(crtc, false);
}
/* Implemented only the options currently availible for the driver */
/* Implemented only the options currently available for the driver */
static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
.reset = dm_crtc_reset_state,
.destroy = amdgpu_dm_crtc_destroy,
@ -6846,15 +7020,12 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
if (aconnector->mst_mgr.dev)
drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr);
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
for (i = 0; i < dm->num_of_edps; i++) {
if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) {
backlight_device_unregister(dm->backlight_dev[i]);
dm->backlight_dev[i] = NULL;
}
}
#endif
if (aconnector->dc_em_sink)
dc_sink_release(aconnector->dc_em_sink);
@ -7042,7 +7213,11 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
break;
}
dc_result = dc_validate_stream(adev->dm.dc, stream);
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
if (dc_result == DC_OK)
dc_result = dc_validate_stream(adev->dm.dc, stream);
if (dc_result != DC_OK) {
DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n",
@ -7342,7 +7517,7 @@ static void dm_encoder_helper_disable(struct drm_encoder *encoder)
}
static int convert_dc_color_depth_into_bpc (enum dc_color_depth display_color_depth)
int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth)
{
switch (display_color_depth) {
case COLOR_DEPTH_666:
@ -9224,6 +9399,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].plane_info =
&bundle->plane_infos[planes_count];
fill_dc_dirty_rects(plane, old_plane_state, new_plane_state,
new_crtc_state,
&bundle->flip_addrs[planes_count]);
/*
* Only allow immediate flips for fast updates that don't
* change FB pitch, DCC state, rotation or mirroing.
@ -9310,8 +9489,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* and rely on sending it from software.
*/
if (acrtc_attach->base.state->event &&
acrtc_state->active_planes > 0 &&
!acrtc_state->force_dpms_off) {
acrtc_state->active_planes > 0) {
drm_crtc_vblank_get(pcrtc);
spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
@ -9419,6 +9597,18 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
/* Allow PSR when skip count is 0. */
acrtc_attach->dm_irq_params.allow_psr_entry = !aconn->psr_skip_count;
/*
* If sink supports PSR SU, there is no need to rely on
* a vblank event disable request to enable PSR. PSR SU
* can be enabled immediately once OS demonstrates an
* adequate number of fast atomic commits to notify KMD
* of update events. See `vblank_control_worker()`.
*/
if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
acrtc_attach->dm_irq_params.allow_psr_entry &&
!acrtc_state->stream->link->psr_settings.psr_allow_active)
amdgpu_dm_psr_enable(acrtc_state->stream);
} else {
acrtc_attach->dm_irq_params.allow_psr_entry = false;
}
@ -9912,15 +10102,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
/* Update audio instances for each connector. */
amdgpu_dm_commit_audio(dev, state);
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
/* restore the backlight level */
for (i = 0; i < dm->num_of_edps; i++) {
if (dm->backlight_dev[i] &&
(dm->actual_brightness[i] != dm->brightness[i]))
amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
}
#endif
/*
* send vblank event on all events not handled in flip and
* mark consumed event for drm_atomic_helper_commit_hw_done
@ -10368,7 +10556,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* added MST connectors not found in existing crtc_state in the chained mode
* TODO: need to dig out the root cause of that
*/
if (!aconnector || (!aconnector->dc_sink && aconnector->mst_port))
if (!aconnector)
goto skip_modeset;
if (modereset_required(new_crtc_state))
@ -10979,7 +11167,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
}
}
pre_validate_dsc(state, &dm_state, vars);
if (!pre_validate_dsc(state, &dm_state, vars)) {
ret = -EINVAL;
goto fail;
}
}
#endif
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
@ -11225,6 +11416,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
#if defined(CONFIG_DRM_AMD_DC_DCN)
if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) {
DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
ret = -EINVAL;
goto fail;
}

View file

@ -242,6 +242,13 @@ struct hpd_rx_irq_offload_work {
* @force_timing_sync: set via debugfs. When set, indicates that all connected
* displays will be forced to synchronize.
* @dmcub_trace_event_en: enable dmcub trace events
* @dmub_outbox_params: DMUB Outbox parameters
* @num_of_edps: number of backlight eDPs
* @disable_hpd_irq: disables all HPD and HPD RX interrupt handling in the
* driver when true
* @dmub_aux_transfer_done: struct completion used to indicate when DMUB
* transfers are done
* @delayed_hpd_wq: work queue used to delay DMUB HPD work
*/
struct amdgpu_display_manager {
@ -583,7 +590,6 @@ struct amdgpu_dm_connector {
struct drm_dp_mst_port *port;
struct amdgpu_dm_connector *mst_port;
struct drm_dp_aux *dsc_aux;
/* TODO see if we can merge with ddc_bus or make a dm_connector */
struct amdgpu_i2c_adapter *i2c;
@ -639,8 +645,6 @@ struct dm_crtc_state {
bool dsc_force_changed;
bool vrr_supported;
bool force_dpms_off;
struct mod_freesync_config freesync_config;
struct dc_info_packet vrr_infopacket;
@ -749,4 +753,6 @@ int dm_atomic_get_state(struct drm_atomic_state *state,
struct amdgpu_dm_connector *
amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
struct drm_crtc *crtc);
int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth);
#endif /* __AMDGPU_DM_H__ */

View file

@ -540,11 +540,11 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
/* apply phy settings from user */
for (r = 0; r < link_lane_settings.link_settings.lane_count; r++) {
link_lane_settings.lane_settings[r].VOLTAGE_SWING =
link_lane_settings.hw_lane_settings[r].VOLTAGE_SWING =
(enum dc_voltage_swing) (param[0]);
link_lane_settings.lane_settings[r].PRE_EMPHASIS =
link_lane_settings.hw_lane_settings[r].PRE_EMPHASIS =
(enum dc_pre_emphasis) (param[1]);
link_lane_settings.lane_settings[r].POST_CURSOR2 =
link_lane_settings.hw_lane_settings[r].POST_CURSOR2 =
(enum dc_post_cursor2) (param[2]);
}
@ -738,7 +738,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
}
for (i = 0; i < (unsigned int)(link_training_settings.link_settings.lane_count); i++)
link_training_settings.lane_settings[i] = link->cur_lane_setting[i];
link_training_settings.hw_lane_settings[i] = link->cur_lane_setting[i];
dc_link_set_test_pattern(
link,

View file

@ -476,13 +476,16 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
link->ddc_line = aconnector->dc_link->ddc_hw_inst + 1;
display->stream_enc_idx = config->stream_enc_idx;
link->link_enc_idx = config->link_enc_idx;
link->dio_output_id = config->dio_output_idx;
link->phy_idx = config->phy_idx;
if (sink)
link_is_hdcp14 = dc_link_is_hdcp14(aconnector->dc_link, sink->sink_signal);
link->hdcp_supported_informational = link_is_hdcp14;
link->dp.rev = aconnector->dc_link->dpcd_caps.dpcd_rev.raw;
link->dp.assr_enabled = config->assr_enabled;
link->dp.mst_enabled = config->mst_enabled;
link->dp.usb4_enabled = config->usb4_enabled;
display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
link->adjust.auth_delay = 3;
link->adjust.hdcp1.disable = 0;

View file

@ -451,7 +451,6 @@ bool dm_helpers_dp_mst_stop_top_mgr(
struct dc_link *link)
{
struct amdgpu_dm_connector *aconnector = link->priv;
uint8_t i;
if (!aconnector) {
DRM_ERROR("Failed to find connector for link!");
@ -463,22 +462,7 @@ bool dm_helpers_dp_mst_stop_top_mgr(
if (aconnector->mst_mgr.mst_state == true) {
drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, false);
for (i = 0; i < MAX_SINKS_PER_LINK; i++) {
if (link->remote_sinks[i] == NULL)
continue;
if (link->remote_sinks[i]->sink_signal ==
SIGNAL_TYPE_DISPLAY_PORT_MST) {
dc_link_remove_remote_sink(link, link->remote_sinks[i]);
if (aconnector->dc_sink) {
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
aconnector->dc_link->cur_link_settings.lane_count = 0;
}
}
}
link->cur_link_settings.lane_count = 0;
}
return false;

View file

@ -140,11 +140,28 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
static void
amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
struct amdgpu_dm_connector *aconnector =
to_amdgpu_dm_connector(connector);
struct drm_dp_mst_port *port = amdgpu_dm_connector->port;
struct drm_dp_mst_port *port = aconnector->port;
struct amdgpu_dm_connector *root = aconnector->mst_port;
struct dc_link *dc_link = aconnector->dc_link;
struct dc_sink *dc_sink = aconnector->dc_sink;
drm_dp_mst_connector_early_unregister(connector, port);
/*
* Release dc_sink for connector which its attached port is
* no longer in the mst topology
*/
drm_modeset_lock(&root->mst_mgr.base.lock, NULL);
if (dc_sink) {
if (dc_link->sink_count)
dc_link_remove_remote_sink(dc_link, dc_sink);
dc_sink_release(dc_sink);
aconnector->dc_sink = NULL;
}
drm_modeset_unlock(&root->mst_mgr.base.lock);
}
static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
@ -344,12 +361,59 @@ dm_dp_mst_detect(struct drm_connector *connector,
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct amdgpu_dm_connector *master = aconnector->mst_port;
struct drm_dp_mst_port *port = aconnector->port;
int connection_status;
if (drm_connector_is_unregistered(connector))
return connector_status_disconnected;
return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
aconnector->port);
connection_status = drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
aconnector->port);
if (port->pdt != DP_PEER_DEVICE_NONE && !port->dpcd_rev) {
uint8_t dpcd_rev;
int ret;
ret = drm_dp_dpcd_readb(&port->aux, DP_DP13_DPCD_REV, &dpcd_rev);
if (ret == 1) {
port->dpcd_rev = dpcd_rev;
/* Could be DP1.2 DP Rx case*/
if (!dpcd_rev) {
ret = drm_dp_dpcd_readb(&port->aux, DP_DPCD_REV, &dpcd_rev);
if (ret == 1)
port->dpcd_rev = dpcd_rev;
}
if (!dpcd_rev)
DRM_DEBUG_KMS("Can't decide DPCD revision number!");
}
/*
* Could be legacy sink, logical port etc on DP1.2.
* Will get Nack under these cases when issue remote
* DPCD read.
*/
if (ret != 1)
DRM_DEBUG_KMS("Can't access DPCD");
} else if (port->pdt == DP_PEER_DEVICE_NONE) {
port->dpcd_rev = 0;
}
/*
* Release dc_sink for connector which unplug event is notified by CSN msg
*/
if (connection_status == connector_status_disconnected && aconnector->dc_sink) {
if (aconnector->dc_link->sink_count)
dc_link_remove_remote_sink(aconnector->dc_link, aconnector->dc_sink);
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
}
return connection_status;
}
static int dm_dp_mst_atomic_check(struct drm_connector *connector,
@ -634,7 +698,7 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn)
return dsc_config.bits_per_pixel;
}
static void increase_dsc_bpp(struct drm_atomic_state *state,
static bool increase_dsc_bpp(struct drm_atomic_state *state,
struct dc_link *dc_link,
struct dsc_mst_fairness_params *params,
struct dsc_mst_fairness_vars *vars,
@ -694,7 +758,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
params[next_index].port,
vars[next_index].pbn,
pbn_per_timeslot) < 0)
return;
return false;
if (!drm_dp_mst_atomic_check(state)) {
vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn);
} else {
@ -704,7 +768,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
params[next_index].port,
vars[next_index].pbn,
pbn_per_timeslot) < 0)
return;
return false;
}
} else {
vars[next_index].pbn += initial_slack[next_index];
@ -713,7 +777,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
params[next_index].port,
vars[next_index].pbn,
pbn_per_timeslot) < 0)
return;
return false;
if (!drm_dp_mst_atomic_check(state)) {
vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16;
} else {
@ -723,16 +787,17 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
params[next_index].port,
vars[next_index].pbn,
pbn_per_timeslot) < 0)
return;
return false;
}
}
bpp_increased[next_index] = true;
remaining_to_increase--;
}
return true;
}
static void try_disable_dsc(struct drm_atomic_state *state,
static bool try_disable_dsc(struct drm_atomic_state *state,
struct dc_link *dc_link,
struct dsc_mst_fairness_params *params,
struct dsc_mst_fairness_vars *vars,
@ -780,7 +845,7 @@ static void try_disable_dsc(struct drm_atomic_state *state,
params[next_index].port,
vars[next_index].pbn,
dm_mst_get_pbn_divider(dc_link)) < 0)
return;
return false;
if (!drm_dp_mst_atomic_check(state)) {
vars[next_index].dsc_enabled = false;
@ -792,12 +857,13 @@ static void try_disable_dsc(struct drm_atomic_state *state,
params[next_index].port,
vars[next_index].pbn,
dm_mst_get_pbn_divider(dc_link)) < 0)
return;
return false;
}
tried[next_index] = true;
remaining_to_try--;
}
return true;
}
static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
@ -913,9 +979,11 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
return false;
/* Optimize degree of compression */
increase_dsc_bpp(state, dc_link, params, vars, count, k);
if (!increase_dsc_bpp(state, dc_link, params, vars, count, k))
return false;
try_disable_dsc(state, dc_link, params, vars, count, k);
if (!try_disable_dsc(state, dc_link, params, vars, count, k))
return false;
set_dsc_configs_from_fairness_vars(params, vars, count, k);
@ -1187,21 +1255,22 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state)
return ret;
}
void pre_validate_dsc(struct drm_atomic_state *state,
bool pre_validate_dsc(struct drm_atomic_state *state,
struct dm_atomic_state **dm_state_ptr,
struct dsc_mst_fairness_vars *vars)
{
int i;
struct dm_atomic_state *dm_state;
struct dc_state *local_dc_state = NULL;
int ret = 0;
if (!is_dsc_precompute_needed(state)) {
DRM_INFO_ONCE("DSC precompute is not needed.\n");
return;
return true;
}
if (dm_atomic_get_state(state, dm_state_ptr)) {
DRM_INFO_ONCE("dm_atomic_get_state() failed\n");
return;
return false;
}
dm_state = *dm_state_ptr;
@ -1213,7 +1282,7 @@ void pre_validate_dsc(struct drm_atomic_state *state,
local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL);
if (!local_dc_state)
return;
return false;
for (i = 0; i < local_dc_state->stream_count; i++) {
struct dc_stream_state *stream = dm_state->context->streams[i];
@ -1239,11 +1308,19 @@ void pre_validate_dsc(struct drm_atomic_state *state,
&state->crtcs[ind].new_state->mode,
dm_new_conn_state,
dm_old_crtc_state->stream);
if (local_dc_state->streams[i] == NULL) {
ret = -EINVAL;
break;
}
}
}
if (ret != 0)
goto clean_exit;
if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) {
DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n");
ret = -EINVAL;
goto clean_exit;
}
@ -1273,5 +1350,43 @@ void pre_validate_dsc(struct drm_atomic_state *state,
}
kfree(local_dc_state);
return (ret == 0);
}
#endif
enum dc_status dm_dp_mst_is_port_support_mode(
struct amdgpu_dm_connector *aconnector,
struct dc_stream_state *stream)
{
int bpp, pbn, branch_max_throughput_mps = 0;
/* check if mode could be supported within fUll_pbn */
bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
if (pbn > aconnector->port->full_pbn)
return DC_FAIL_BANDWIDTH_VALIDATE;
/* check is mst dsc output bandwidth branch_overall_throughput_0_mps */
switch (stream->timing.pixel_encoding) {
case PIXEL_ENCODING_RGB:
case PIXEL_ENCODING_YCBCR444:
branch_max_throughput_mps =
aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_0_mps;
break;
case PIXEL_ENCODING_YCBCR422:
case PIXEL_ENCODING_YCBCR420:
branch_max_throughput_mps =
aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_1_mps;
break;
default:
break;
}
if (branch_max_throughput_mps != 0 &&
((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000))
return DC_FAIL_BANDWIDTH_VALIDATE;
return DC_OK;
}

View file

@ -59,8 +59,12 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
bool needs_dsc_aux_workaround(struct dc_link *link);
void pre_validate_dsc(struct drm_atomic_state *state,
bool pre_validate_dsc(struct drm_atomic_state *state,
struct dm_atomic_state **dm_state_ptr,
struct dsc_mst_fairness_vars *vars);
enum dc_status dm_dp_mst_is_port_support_mode(
struct amdgpu_dm_connector *aconnector,
struct dc_stream_state *stream);
#endif

View file

@ -79,10 +79,12 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link)
link->psr_settings.psr_feature_enabled = true;
}
DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d\n",
DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
link->psr_settings.psr_feature_enabled,
link->psr_settings.psr_version,
link->dpcd_caps.psr_info.psr_version);
link->dpcd_caps.psr_info.psr_version,
link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
}
@ -97,19 +99,24 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
struct dc_link *link = NULL;
struct psr_config psr_config = {0};
struct psr_context psr_context = {0};
struct dc *dc = NULL;
bool ret = false;
if (stream == NULL)
return false;
link = stream->link;
dc = link->ctx->dc;
if (link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) {
psr_config.psr_version = link->psr_settings.psr_version;
psr_config.psr_frame_capture_indication_req = 0;
psr_config.psr_rfb_setup_time = 0x37;
psr_config.psr_sdp_transmit_line_num_deadline = 0x20;
psr_config.allow_smu_optimizations = 0x0;
mod_power_calc_psr_configs(&psr_config, link, stream);
/* linux DM specific updating for psr config fields */
psr_config.allow_smu_optimizations =
(amdgpu_dc_feature_mask & DC_PSR_ALLOW_SMU_OPT) &&
mod_power_only_edp(dc->current_state, stream);
psr_config.allow_multi_disp_optimizations =
(amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT);
ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context);

View file

@ -38,6 +38,8 @@ DC_LIBS += dcn303
DC_LIBS += dcn31
DC_LIBS += dcn315
DC_LIBS += dcn316
DC_LIBS += dcn32
DC_LIBS += dcn321
endif
DC_LIBS += dce120

File diff suppressed because it is too large Load diff

View file

@ -40,6 +40,7 @@ struct object_info_table {
struct atom_data_revision revision;
union {
struct display_object_info_table_v1_4 *v1_4;
struct display_object_info_table_v1_5 *v1_5;
};
};

View file

@ -522,8 +522,8 @@ static enum bp_result transmitter_control_v2(
*/
params.acConfig.ucEncoderSel = 1;
if (CONNECTOR_ID_DISPLAY_PORT == connector_id
|| CONNECTOR_ID_USBC == connector_id)
if (CONNECTOR_ID_DISPLAY_PORT == connector_id ||
CONNECTOR_ID_USBC == connector_id)
/* Bit4: DP connector flag
* =0 connector is none-DP connector
* =1 connector is DP connector

View file

@ -77,6 +77,8 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
case DCN_VERSION_3_1:
case DCN_VERSION_3_15:
case DCN_VERSION_3_16:
case DCN_VERSION_3_2:
case DCN_VERSION_3_21:
*h = dal_cmd_tbl_helper_dce112_get_table2();
return true;

View file

@ -172,4 +172,38 @@ AMD_DAL_CLK_MGR_DCN316 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn316/,$(CLK_MGR_
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN316)
###############################################################################
# DCN32
###############################################################################
CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o
AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32))
ifdef CONFIG_X86
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse
endif
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec
endif
ifdef CONFIG_CC_IS_GCC
ifeq ($(call cc-ifversion, -lt, 0701, y), y)
IS_OLD_GCC = 1
endif
endif
ifdef CONFIG_X86
ifdef IS_OLD_GCC
# Stack alignment mismatch, proceed with caution.
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
# (8B stack alignment).
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4
else
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2
endif
endif
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
endif

View file

@ -45,6 +45,7 @@
#include "dcn31/dcn31_clk_mgr.h"
#include "dcn315/dcn315_clk_mgr.h"
#include "dcn316/dcn316_clk_mgr.h"
#include "dcn32/dcn32_clk_mgr.h"
int clk_mgr_helper_get_active_display_cnt(
@ -316,8 +317,19 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
return &clk_mgr->base.base;
}
break;
#endif
case AMDGPU_FAMILY_GC_11_0_0: {
struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
if (clk_mgr == NULL) {
BREAK_TO_DEBUGGER();
return NULL;
}
dcn32_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base;
break;
}
#endif
default:
ASSERT(0); /* Unknown Asic */
break;
@ -360,6 +372,9 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
dcn316_clk_mgr_destroy(clk_mgr);
break;
case AMDGPU_FAMILY_GC_11_0_0:
dcn32_clk_mgr_destroy(clk_mgr);
break;
default:
break;
}

View file

@ -126,16 +126,24 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct dc_state *context)
{
int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
int dpp_divider = 0;
int disp_divider = 0;
uint32_t dppclk_wdivider = 0;
uint32_t dispclk_wdivider = 0;
uint32_t current_dispclk_wdivider;
uint32_t i;
if (clk_mgr->base.clks.dppclk_khz == 0 || clk_mgr->base.clks.dispclk_khz == 0)
return;
dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz;
disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
REG_GET(DENTIST_DISPCLK_CNTL,
DENTIST_DISPCLK_WDIVIDER, &current_dispclk_wdivider);
@ -436,7 +444,6 @@ void dcn2_read_clocks_from_hw_dentist(struct clk_mgr *clk_mgr_base)
clk_mgr_base->clks.dppclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz) / dpp_divider;
}
}
void dcn2_get_clock(struct clk_mgr *clk_mgr,

View file

@ -41,6 +41,12 @@
#define FN(reg_name, field) \
FD(reg_name##__##field)
#include "logger_types.h"
#undef DC_LOGGER
#define DC_LOGGER \
CTX->logger
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
#define VBIOSSMC_MSG_TestMessage 0x1
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_PowerUpGfx 0x3
@ -97,6 +103,12 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK);
smu_print("SMU response after wait: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
}
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);

View file

@ -129,7 +129,7 @@ static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dummy_pstate_latency_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
@ -137,6 +137,14 @@ static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
/* Set D - MALL - SR enter and exit times adjusted for MALL */
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
@ -517,6 +525,8 @@ static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct d
if (!clk_mgr->smu_present)
return;
/* TODO - DP2.0 HW: calculate link 128b/132 link rate in clock manager with new formula */
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
for (i = 0; i < MAX_PIPES * 2; i++) {
@ -620,7 +630,8 @@ void dcn3_clk_mgr_construct(
void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
{
kfree(clk_mgr->base.bw_params);
if (clk_mgr->base.bw_params)
kfree(clk_mgr->base.bw_params);
if (clk_mgr->wm_range_table)
dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART,

View file

@ -26,6 +26,66 @@
#ifndef __DCN30_CLK_MGR_H__
#define __DCN30_CLK_MGR_H__
//CLK1_CLK_PLL_REQ
#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
//CLK1_CLK0_DFS_CNTL
#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0
#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL
/*DPREF clock related*/
#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
//CLK3_0_CLK3_CLK_PLL_REQ
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
#define mmCLK0_CLK2_DFS_CNTL 0x16C55
#define mmCLK00_CLK0_CLK2_DFS_CNTL 0x16C55
#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E55
#define mmCLK02_CLK0_CLK2_DFS_CNTL 0x17055
#define mmCLK0_CLK3_DFS_CNTL 0x16C60
#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60
#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60
#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060
#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260
#define mmCLK0_CLK_PLL_REQ 0x16C10
#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10
#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10
#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010
#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210
#define mmCLK1_CLK_PLL_REQ 0x1B00D
#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D
#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D
#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D
#define mmCLK2_CLK_PLL_REQ 0x17E0D
/*AMCLK*/
#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F
#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
#endif
void dcn3_init_clocks(struct clk_mgr *clk_mgr_base);
void dcn3_clk_mgr_construct(struct dc_context *ctx,

View file

@ -28,6 +28,8 @@
#include "clk_mgr_internal.h"
#include "reg_helper.h"
#include "dm_helpers.h"
#include "dalsmc.h"
#include "dcn30_smu11_driver_if.h"
@ -74,6 +76,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
{
uint32_t result;
/* Wait for response register to be ready */
dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
@ -86,8 +89,14 @@ static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint
/* Trigger the message transaction by writing the message ID */
REG_WRITE(DAL_MSG_REG, msg_id);
result = dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
if (IS_SMU_TIMEOUT(result)) {
dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000);
}
/* Wait for response */
if (dcn30_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
if (result == DALSMC_Result_OK) {
if (param_out)
*param_out = REG_READ(DAL_ARG_REG);

View file

@ -41,6 +41,12 @@
#define FN(reg_name, field) \
FD(reg_name##__##field)
#include "logger_types.h"
#undef DC_LOGGER
#define DC_LOGGER \
CTX->logger
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_SetDispclkFreq 0x4
#define VBIOSSMC_MSG_SetDprefclkFreq 0x5
@ -96,6 +102,12 @@ static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
smu_print("SMU response after wait: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
}
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);

View file

@ -40,6 +40,12 @@
#define FN(reg_name, field) \
FD(reg_name##__##field)
#include "logger_types.h"
#undef DC_LOGGER
#define DC_LOGGER \
CTX->logger
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
#define VBIOSSMC_MSG_TestMessage 0x1
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_PowerUpGfx 0x3
@ -104,6 +110,8 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK);
smu_print("SMU response after wait: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
}

View file

@ -550,7 +550,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
if (!bw_params->clk_table.entries[i].dtbclk_mhz)
bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
}
ASSERT(bw_params->clk_table.entries[i].dcfclk_mhz);
ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
bw_params->vram_type = bios_info->memory_type;
bw_params->num_channels = bios_info->ma_channel_number;
if (!bw_params->num_channels)

View file

@ -70,6 +70,12 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
#define REG_NBIO(reg_name) \
(NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name)
#include "logger_types.h"
#undef DC_LOGGER
#define DC_LOGGER \
CTX->logger
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
#define mmMP1_C2PMSG_3 0x3B1050C
#define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
@ -132,6 +138,8 @@ static int dcn315_smu_send_msg_with_param(
result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK);
smu_print("SMU response after wait: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
}

Some files were not shown because too many files have changed in this diff Show more