Merge tag 'amd-drm-next-5.19-2022-05-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-5.19-2022-05-18:

amdgpu:
- Misc code cleanups
- Additional SMU 13.x enablement
- Smartshift fixes
- GFX11 fixes
- Support for SMU 13.0.4
- SMU mutex fix
- Suspend/resume fix

amdkfd:
- static checker fix
- Doorbell/MMIO resource handling fix

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220518205621.5741-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2022-05-19 14:09:46 +10:00
commit 00df0514ab
251 changed files with 384461 additions and 2229 deletions

View file

@ -58,7 +58,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@ -74,7 +74,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o
# add DF block
amdgpu-y += \
@ -87,7 +88,7 @@ amdgpu-y += \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
mmhub_v1_7.o
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o
# add UMC block
amdgpu-y += \
@ -102,7 +103,8 @@ amdgpu-y += \
cz_ih.o \
vega10_ih.o \
vega20_ih.o \
navi10_ih.o
navi10_ih.o \
ih_v6_0.o
# add PSP block
amdgpu-y += \
@ -128,7 +130,9 @@ amdgpu-y += \
gfx_v9_0.o \
gfx_v9_4.o \
gfx_v9_4_2.o \
gfx_v10_0.o
gfx_v10_0.o \
imu_v11_0.o \
gfx_v11_0.o
# add async DMA block
amdgpu-y += \
@ -138,11 +142,14 @@ amdgpu-y += \
sdma_v4_0.o \
sdma_v4_4.o \
sdma_v5_0.o \
sdma_v5_2.o
sdma_v5_2.o \
sdma_v6_0.o
# add MES block
amdgpu-y += \
mes_v10_1.o
amdgpu_mes.o \
mes_v10_1.o \
mes_v11_0.o
# add UVD block
amdgpu-y += \
@ -160,28 +167,33 @@ amdgpu-y += \
# add VCN and JPEG block
amdgpu-y += \
amdgpu_vcn.o \
vcn_sw_ring.o \
vcn_v1_0.o \
vcn_v2_0.o \
vcn_v2_5.o \
vcn_v3_0.o \
vcn_v4_0.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
jpeg_v2_5.o \
jpeg_v3_0.o
jpeg_v3_0.o \
jpeg_v4_0.o
# add ATHUB block
amdgpu-y += \
athub_v1_0.o \
athub_v2_0.o \
athub_v2_1.o
athub_v2_1.o \
athub_v3_0.o
# add SMUIO block
amdgpu-y += \
smuio_v9_0.o \
smuio_v11_0.o \
smuio_v11_0_6.o \
smuio_v13_0.o
smuio_v13_0.o \
smuio_v13_0_6.o
# add reset block
amdgpu-y += \
@ -207,7 +219,8 @@ amdgpu-y += \
amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_aldebaran.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o
amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o

View file

@ -86,11 +86,13 @@
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
#include "amdgpu_lsdma.h"
#include "amdgpu_nbio.h"
#include "amdgpu_hdp.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
#include "amdgpu_mes_ctx.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
#include "amdgpu_job.h"
@ -207,6 +209,7 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
extern int amdgpu_mes_kiq;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
@ -641,6 +644,7 @@ enum amd_hw_ip_block_type {
SDMA5_HWIP,
SDMA6_HWIP,
SDMA7_HWIP,
LSDMA_HWIP,
MMHUB_HWIP,
ATHUB_HWIP,
NBIO_HWIP,
@ -720,6 +724,26 @@ struct ip_discovery_top;
(rid == 0x01) || \
(rid == 0x10))))
struct amdgpu_mqd_prop {
uint64_t mqd_gpu_addr;
uint64_t hqd_base_gpu_addr;
uint64_t rptr_gpu_addr;
uint64_t wptr_gpu_addr;
uint32_t queue_size;
bool use_doorbell;
uint32_t doorbell_index;
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority;
bool hqd_active;
};
struct amdgpu_mqd {
unsigned mqd_size;
int (*init_mqd)(struct amdgpu_device *adev, void *mqd,
struct amdgpu_mqd_prop *p);
};
#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
#define AMDGPU_PRODUCT_NAME_LEN 64
@ -887,6 +911,9 @@ struct amdgpu_device {
/* sdma */
struct amdgpu_sdma sdma;
/* lsdma */
struct amdgpu_lsdma lsdma;
/* uvd */
struct amdgpu_uvd uvd;
@ -919,7 +946,9 @@ struct amdgpu_device {
/* mes */
bool enable_mes;
bool enable_mes_kiq;
struct amdgpu_mes mes;
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
/* df */
struct amdgpu_df df;
@ -981,10 +1010,10 @@ struct amdgpu_device {
bool runpm;
bool in_runpm;
bool has_pr3;
bool is_fw_fb;
bool pm_sysfs_en;
bool ucode_sysfs_en;
bool psp_sysfs_en;
/* Chip product information */
char product_number[16];
@ -1016,6 +1045,9 @@ struct amdgpu_device {
/* reset dump register */
uint32_t *reset_dump_reg_list;
int num_regs;
bool scpm_enabled;
uint32_t scpm_status;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@ -1188,7 +1220,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_flush_hdp(adev, r) \
((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
#define amdgpu_asic_invalidate_hdp(adev, r) \
((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : (adev)->hdp.funcs->invalidate_hdp((adev), (r)))
((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \
((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : 0))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
@ -1345,9 +1378,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
#else
static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif

View file

@ -1045,6 +1045,20 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
(pm_suspend_target_state == PM_SUSPEND_MEM);
}
/**
* amdgpu_acpi_should_gpu_reset
*
* @adev: amdgpu_device_pointer
*
* returns true if should reset GPU, false if not
*/
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return false;
return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
}
/**
* amdgpu_acpi_is_s0ix_active
*

View file

@ -100,7 +100,18 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
* The first num_doorbells are used by amdgpu.
* amdkfd takes whatever's left in the aperture.
*/
if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
if (adev->enable_mes) {
/*
* With MES enabled, we only need to initialize
* the base address. The size and offset are
* not initialized as AMDGPU manages the whole
* doorbell space.
*/
*aperture_base = adev->doorbell.base;
*aperture_size = 0;
*start_offset = 0;
} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
sizeof(u32)) {
*aperture_base = adev->doorbell.base;
*aperture_size = adev->doorbell.size;
*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
@ -128,7 +139,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev_to_drm(adev)->render->index,
.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
.enable_mes = adev->enable_mes,
};
/* this is going to have a few of the MSBs set that we need to

View file

@ -0,0 +1,625 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
#include "oss/osssys_6_0_0_offset.h"
#include "oss/osssys_6_0_0_sh_mask.h"
#include "soc15_common.h"
#include "soc15d.h"
#include "v11_structs.h"
#include "soc21.h"
enum hqd_dequeue_request_type {
NO_ACTION = 0,
DRAIN_PIPE,
RESET_WAVES,
SAVE_WAVES
};
static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, mec, pipe, queue, vmid);
}
static void unlock_srbm(struct amdgpu_device *adev)
{
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(adev, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
queue_id;
return 1ull << bit;
}
static void release_queue(struct amdgpu_device *adev)
{
unlock_srbm(adev);
}
static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
{
lock_srbm(adev, 0, 0, 0, vmid);
WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
unlock_srbm(adev);
}
static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
/* Mapping vmid to pasid also for IH block */
pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
vmid, pasid);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
return 0;
}
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
{
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(adev, mec, pipe, 0, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
unlock_srbm(adev);
return 0;
}
static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
uint32_t sdma_engine_reg_base = 0;
uint32_t sdma_rlc_reg_offset;
switch (engine_id) {
case 0:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
break;
case 1:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
break;
default:
BUG();
}
sdma_rlc_reg_offset = sdma_engine_reg_base
+ queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
queue_id, sdma_rlc_reg_offset);
return sdma_rlc_reg_offset;
}
static inline struct v11_compute_mqd *get_mqd(void *mqd)
{
return (struct v11_compute_mqd *)mqd;
}
static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
{
return (struct v11_sdma_mqd *)mqd;
}
static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm)
{
struct v11_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
acquire_queue(adev, pipe_id, queue_id);
/* HIQ is set during driver init period with vmid set to 0*/
if (m->cp_hqd_vmid == 0) {
uint32_t value, mec, pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
}
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
WREG32(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
if (wptr) {
/* Don't read wptr with get_user because the user
* context may not be accessible (if this function
* runs in a work queue). Instead trigger a one-shot
* polling read from memory in the CP. This assumes
* that wptr is GPU-accessible in the queue's VMID via
* ATC or SVM. WPTR==RPTR before starting the poll so
* the CP starts fetching new commands from the right
* place.
*
* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
* tricky. Assume that the queue didn't overflow. The
* number of valid bits in the 32-bit RPTR depends on
* the queue size. The remaining bits are taken from
* the saved 64-bit WPTR. If the WPTR wrapped, add the
* queue size.
*/
uint32_t queue_size =
2 << REG_GET_FIELD(m->cp_hqd_pq_control,
CP_HQD_PQ_CONTROL, QUEUE_SIZE);
uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
guessed_wptr += queue_size;
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uint64_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uint64_t)wptr));
pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
release_queue(adev);
return 0;
}
static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v11_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
acquire_queue(adev, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
goto out_unlock;
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
amdgpu_ring_write(kiq_ring,
PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
PACKET3_MAP_QUEUES_QUEUE(queue_id) |
PACKET3_MAP_QUEUES_PIPE(pipe) |
PACKET3_MAP_QUEUES_ME((mec - 1)) |
PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
amdgpu_ring_write(kiq_ring,
PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
amdgpu_ring_commit(kiq_ring);
out_unlock:
spin_unlock(&adev->gfx.kiq.ring_lock);
release_queue(adev);
return r;
}
static int hqd_dump_v11(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
break; \
(*dump)[i][0] = (addr) << 2; \
(*dump)[i++][1] = RREG32(addr); \
} while (0)
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
struct v11_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
}
usleep_range(500, 1000);
}
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
ENABLE, 1);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
m->sdmax_rlcx_rb_rptr);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
lower_32_bits(data64));
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
RB_ENABLE, 1);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
return 0;
}
static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (7+11+1+12+12)
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
for (reg = regSDMA0_QUEUE0_RB_CNTL;
reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id)
{
uint32_t act;
bool retval = false;
uint32_t low, high;
acquire_queue(adev, pipe_id, queue_id);
act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
retval = true;
}
release_queue(adev);
return retval;
}
static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
{
struct v11_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
return true;
return false;
}
static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
struct v11_compute_mqd *m = get_mqd(mqd);
acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
type = DRAIN_PIPE;
break;
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
default:
type = DRAIN_PIPE;
break;
}
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue pipe %d queue %d preemption failed\n",
pipe_id, queue_id);
release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
release_queue(adev);
return 0;
}
static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
struct v11_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
while (true) {
temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
}
usleep_range(500, 1000);
}
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
return 0;
}
static int wave_control_execute_v11(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
{
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SA_BROADCAST_WRITES, 1);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base)
{
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
vmid);
return;
}
/* SDMA is on gfxhub as well for gfx11 adapters */
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v11,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
.init_interrupts = init_interrupts_v11,
.hqd_load = hqd_load_v11,
.hiq_mqd_load = hiq_mqd_load_v11,
.hqd_sdma_load = hqd_sdma_load_v11,
.hqd_dump = hqd_dump_v11,
.hqd_sdma_dump = hqd_sdma_dump_v11,
.hqd_is_occupied = hqd_is_occupied_v11,
.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
.hqd_destroy = hqd_destroy_v11,
.hqd_sdma_destroy = hqd_sdma_destroy_v11,
.wave_control_execute = wave_control_execute_v11,
.get_atc_vmid_pasid_mapping_info = NULL,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
};

View file

@ -1483,26 +1483,26 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
} else {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
if (!offset || !*offset)
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
if (!offset || !*offset)
return -EINVAL;
user_addr = untagged_addr(*offset);
} else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
bo_type = ttm_bo_type_sg;
if (size > UINT_MAX)
return -EINVAL;
sg = create_doorbell_sg(*offset, size);
if (!sg)
return -ENOMEM;
} else {
return -EINVAL;
user_addr = untagged_addr(*offset);
} else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
alloc_flags = 0;
if (size > UINT_MAX)
return -EINVAL;
sg = create_doorbell_sg(*offset, size);
if (!sg)
return -ENOMEM;
} else {
return -EINVAL;
}
}
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);

View file

@ -556,6 +556,7 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
union smu_info {
struct atom_smu_info_v3_1 v31;
struct atom_smu_info_v4_0 v40;
};
union gfx_info {
@ -602,7 +603,10 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
data_offset);
/* system clock */
spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
if (frev == 3)
spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
else if (frev == 4)
spll->reference_freq = le32_to_cpu(smu_info->v40.core_refclk_10khz);
spll->reference_div = 0;
spll->min_post_div = 1;

View file

@ -545,7 +545,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
GFP_KERNEL | __GFP_ZERO);
if (!e->user_pages) {
DRM_ERROR("kvmalloc_array failure\n");
return -ENOMEM;
r = -ENOMEM;
goto out_free_user_pages;
}
r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);

View file

@ -296,6 +296,7 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
{
struct amdgpu_device *adev = ctx->adev;
enum amd_dpm_forced_level level;
u32 current_stable_pstate;
int r;
mutex_lock(&adev->pm.stable_pstate_ctx_lock);
@ -304,6 +305,10 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
goto done;
}
r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
if (r || (stable_pstate == current_stable_pstate))
goto done;
switch (stable_pstate) {
case AMDGPU_CTX_STABLE_PSTATE_NONE:
level = AMD_DPM_FORCED_LEVEL_AUTO;

View file

@ -1044,19 +1044,25 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
adev->doorbell.base = pci_resource_start(adev->pdev, 2);
adev->doorbell.size = pci_resource_len(adev->pdev, 2);
adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
adev->doorbell_index.max_assignment+1);
if (adev->doorbell.num_doorbells == 0)
return -EINVAL;
if (adev->enable_mes) {
adev->doorbell.num_doorbells =
adev->doorbell.size / sizeof(u32);
} else {
adev->doorbell.num_doorbells =
min_t(u32, adev->doorbell.size / sizeof(u32),
adev->doorbell_index.max_assignment+1);
if (adev->doorbell.num_doorbells == 0)
return -EINVAL;
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
* paging queue doorbell use the second page. The
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
* doorbells are in the first page. So with paging queue enabled,
* the max num_doorbells should + 1 page (0x400 in dword)
*/
if (adev->asic_type >= CHIP_VEGA10)
adev->doorbell.num_doorbells += 0x400;
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
* paging queue doorbell use the second page. The
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
* doorbells are in the first page. So with paging queue enabled,
* the max num_doorbells should + 1 page (0x400 in dword)
*/
if (adev->asic_type >= CHIP_VEGA10)
adev->doorbell.num_doorbells += 0x400;
}
adev->doorbell.ptr = ioremap(adev->doorbell.base,
adev->doorbell.num_doorbells *
@ -3664,8 +3670,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_mcbp)
DRM_INFO("MCBP is enabled\n");
if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
adev->enable_mes = true;
if (adev->asic_type >= CHIP_NAVI10) {
if (amdgpu_mes || amdgpu_mes_kiq)
adev->enable_mes = true;
if (amdgpu_mes_kiq)
adev->enable_mes_kiq = true;
}
/*
* Reset domain needs to be present early, before XGMI hive discovered
@ -3858,6 +3869,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
} else
adev->ucode_sysfs_en = true;
r = amdgpu_psp_sysfs_init(adev);
if (r) {
adev->psp_sysfs_en = false;
if (!amdgpu_sriov_vf(adev))
DRM_ERROR("Creating psp sysfs failed\n");
} else
adev->psp_sysfs_en = true;
/*
* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
* Otherwise the mgpu fan boost feature will be skipped due to the
@ -3961,10 +3980,6 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
{
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
if (adev->mman.initialized) {
flush_delayed_work(&adev->mman.bdev.wq);
ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
}
adev->shutdown = true;
/* make sure IB test finished before entering exclusive mode
@ -3985,10 +4000,17 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
}
amdgpu_fence_driver_hw_fini(adev);
if (adev->mman.initialized) {
flush_delayed_work(&adev->mman.bdev.wq);
ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
}
if (adev->pm_sysfs_en)
amdgpu_pm_sysfs_fini(adev);
if (adev->ucode_sysfs_en)
amdgpu_ucode_sysfs_fini(adev);
if (adev->psp_sysfs_en)
amdgpu_psp_sysfs_fini(adev);
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
/* disable ras feature must before hw fini */
@ -4487,6 +4509,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
amdgpu_amdkfd_post_reset(adev);
}

View file

@ -47,25 +47,39 @@
#include "jpeg_v2_5.h"
#include "smuio_v9_0.h"
#include "gmc_v10_0.h"
#include "gmc_v11_0.h"
#include "gfxhub_v2_0.h"
#include "mmhub_v2_0.h"
#include "nbio_v2_3.h"
#include "nbio_v4_3.h"
#include "nbio_v7_2.h"
#include "nbio_v7_7.h"
#include "hdp_v5_0.h"
#include "hdp_v5_2.h"
#include "hdp_v6_0.h"
#include "nv.h"
#include "soc21.h"
#include "navi10_ih.h"
#include "ih_v6_0.h"
#include "gfx_v10_0.h"
#include "gfx_v11_0.h"
#include "sdma_v5_0.h"
#include "sdma_v5_2.h"
#include "sdma_v6_0.h"
#include "lsdma_v6_0.h"
#include "vcn_v2_0.h"
#include "jpeg_v2_0.h"
#include "vcn_v3_0.h"
#include "jpeg_v3_0.h"
#include "vcn_v4_0.h"
#include "jpeg_v4_0.h"
#include "amdgpu_vkms.h"
#include "mes_v10_1.h"
#include "mes_v11_0.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#include "smuio_v13_0.h"
#include "smuio_v13_0_6.h"
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
@ -111,6 +125,7 @@ static const char *hw_id_names[HW_ID_MAX] = {
[SDMA1_HWID] = "SDMA1",
[SDMA2_HWID] = "SDMA2",
[SDMA3_HWID] = "SDMA3",
[LSDMA_HWID] = "LSDMA",
[ISP_HWID] = "ISP",
[DBGU_IO_HWID] = "DBGU_IO",
[DF_HWID] = "DF",
@ -160,6 +175,7 @@ static int hw_id_map[MAX_HWIP] = {
[SDMA1_HWIP] = SDMA1_HWID,
[SDMA2_HWIP] = SDMA2_HWID,
[SDMA3_HWIP] = SDMA3_HWID,
[LSDMA_HWIP] = LSDMA_HWID,
[MMHUB_HWIP] = MMHUB_HWID,
[ATHUB_HWIP] = ATHUB_HWID,
[NBIO_HWIP] = NBIF_HWID,
@ -1467,6 +1483,11 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 7):
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add common ip block(GC_HWIP:0x%x)\n",
@ -1505,6 +1526,11 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 7):
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add gmc ip block(GC_HWIP:0x%x)\n",
@ -1537,6 +1563,11 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 2, 1):
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
break;
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
@ -1575,10 +1606,13 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
break;
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
break;
@ -1618,10 +1652,13 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
break;
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
@ -1715,6 +1752,11 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 7):
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add gfx ip block(GC_HWIP:0x%x)\n",
@ -1753,6 +1795,11 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 2, 7):
amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
break;
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
@ -1827,6 +1874,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 33):
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
break;
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 4):
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
@ -1852,7 +1904,19 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
case IP_VERSION(10, 3, 6):
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
if (amdgpu_mes) {
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
adev->enable_mes = true;
if (amdgpu_mes_kiq)
adev->enable_mes_kiq = true;
}
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
break;
default:
break;
@ -2073,6 +2137,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 7):
adev->family = AMDGPU_FAMILY_GC_10_3_7;
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
adev->family = AMDGPU_FAMILY_GC_11_0_0;
break;
case IP_VERSION(11, 0, 1):
adev->family = AMDGPU_FAMILY_GC_11_0_1;
break;
default:
return -EINVAL;
}
@ -2087,6 +2158,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 7):
case IP_VERSION(11, 0, 1):
adev->flags |= AMD_IS_APU;
break;
default:
@ -2140,6 +2212,15 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.funcs = &nbio_v2_3_funcs;
adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc;
break;
case IP_VERSION(4, 3, 0):
case IP_VERSION(4, 3, 1):
adev->nbio.funcs = &nbio_v4_3_funcs;
adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
break;
case IP_VERSION(7, 7, 0):
adev->nbio.funcs = &nbio_v7_7_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
break;
default:
break;
}
@ -2163,6 +2244,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 2, 0):
adev->hdp.funcs = &hdp_v5_0_funcs;
break;
case IP_VERSION(5, 2, 1):
adev->hdp.funcs = &hdp_v5_2_funcs;
break;
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
adev->hdp.funcs = &hdp_v6_0_funcs;
break;
default:
break;
}
@ -2212,6 +2300,19 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 2):
adev->smuio.funcs = &smuio_v13_0_funcs;
break;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 8):
adev->smuio.funcs = &smuio_v13_0_6_funcs;
break;
default:
break;
}
switch (adev->ip_versions[LSDMA_HWIP][0]) {
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 2):
adev->lsdma.funcs = &lsdma_v6_0_funcs;
break;
default:
break;
}
@ -2262,8 +2363,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
!amdgpu_sriov_vf(adev)) {
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
!amdgpu_sriov_vf(adev)) ||
(adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
r = amdgpu_discovery_set_smu_ip_blocks(adev);
if (r)
return r;
@ -2273,11 +2375,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
if (adev->enable_mes) {
r = amdgpu_discovery_set_mes_ip_blocks(adev);
if (r)
return r;
}
r = amdgpu_discovery_set_mes_ip_blocks(adev);
if (r)
return r;
return 0;
}

View file

@ -52,8 +52,11 @@ struct amdgpu_doorbell_index {
uint32_t userqueue_end;
uint32_t gfx_ring0;
uint32_t gfx_ring1;
uint32_t gfx_userqueue_start;
uint32_t gfx_userqueue_end;
uint32_t sdma_engine[8];
uint32_t mes_ring;
uint32_t mes_ring0;
uint32_t mes_ring1;
uint32_t ih;
union {
struct {
@ -174,11 +177,15 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008,
AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009,
AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A,
AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00B,
AMDGPU_NAVI10_DOORBELL_MES_RING0 = 0x00B,
AMDGPU_NAVI10_DOORBELL_MES_RING1 = 0x00C,
AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00D,
AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
AMDGPU_NAVI10_DOORBELL_MES_RING = 0x090,
AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START = 0x08D,
AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END = 0x0FF,
/* SDMA:256~335*/
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,

View file

@ -38,7 +38,6 @@
#include <linux/mmu_notifier.h>
#include <linux/suspend.h>
#include <linux/cc_platform.h>
#include <linux/fb.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
@ -171,6 +170,7 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp;
int amdgpu_discovery = -1;
int amdgpu_mes;
int amdgpu_mes_kiq;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = -1; /* auto */
@ -306,7 +306,7 @@ module_param_named(dpm, amdgpu_dpm, int, 0444);
* to -1 to select the default loading mode for the ASIC, as defined
* by the driver. The default is -1 (auto).
*/
MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = force direct if supported, -1 = auto)");
MODULE_PARM_DESC(fw_load_type, "firmware loading type (3 = rlc backdoor autoload if supported, 2 = smu load if supported, 1 = psp load, 0 = force direct if supported, -1 = auto)");
module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
/**
@ -636,6 +636,15 @@ MODULE_PARM_DESC(mes,
"Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
module_param_named(mes, amdgpu_mes, int, 0444);
/**
* DOC: mes_kiq (int)
* Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
* (0 = disabled (default), 1 = enabled)
*/
MODULE_PARM_DESC(mes_kiq,
"Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
/**
* DOC: noretry (int)
* Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
@ -1950,26 +1959,6 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
static const struct drm_driver amdgpu_kms_driver;
static bool amdgpu_is_fw_framebuffer(resource_size_t base,
resource_size_t size)
{
bool found = false;
#if IS_REACHABLE(CONFIG_FB)
struct apertures_struct *a;
a = alloc_apertures(1);
if (!a)
return false;
a->ranges[0].base = base;
a->ranges[0].size = size;
found = is_firmware_framebuffer(a);
kfree(a);
#endif
return found;
}
static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
{
struct pci_dev *p = NULL;
@ -2000,8 +1989,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
unsigned long flags = ent->driver_data;
int ret, retry = 0, i;
bool supports_atomic = false;
bool is_fw_fb;
resource_size_t base, size;
/* skip devices which are owned by radeon */
for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
@ -2068,10 +2055,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
#endif
base = pci_resource_start(pdev, 0);
size = pci_resource_len(pdev, 0);
is_fw_fb = amdgpu_is_fw_framebuffer(base, size);
/* Get rid of things like offb */
ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
if (ret)
@ -2084,7 +2067,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
adev->dev = &pdev->dev;
adev->pdev = pdev;
ddev = adev_to_drm(adev);
adev->is_fw_fb = is_fw_fb;
if (!supports_atomic)
ddev->driver_features &= ~DRIVER_ATOMIC;
@ -2336,7 +2318,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
if (!adev->in_s0ix)
if (amdgpu_acpi_should_gpu_reset(adev))
return amdgpu_asic_reset(adev);
return 0;

View file

@ -422,8 +422,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
uint64_t index;
if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);

View file

@ -98,42 +98,6 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
adev->gfx.me.queue_bitmap);
}
/**
* amdgpu_gfx_scratch_get - Allocate a scratch register
*
* @adev: amdgpu_device pointer
* @reg: scratch register mmio offset
*
* Allocate a CP scratch register for use by the driver (all asics).
* Returns 0 on success or -EINVAL on failure.
*/
int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
{
int i;
i = ffs(adev->gfx.scratch.free_mask);
if (i != 0 && i <= adev->gfx.scratch.num_reg) {
i--;
adev->gfx.scratch.free_mask &= ~(1u << i);
*reg = adev->gfx.scratch.reg_base + i;
return 0;
}
return -EINVAL;
}
/**
* amdgpu_gfx_scratch_free - Free a scratch register
*
* @adev: amdgpu_device pointer
* @reg: scratch register mmio offset
*
* Free a CP scratch register allocated for use by the driver (all asics)
*/
void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
{
adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
}
/**
* amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
*
@ -367,7 +331,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
/* create MQD for KIQ */
ring = &adev->gfx.kiq.ring;
if (!ring->mqd_obj) {
if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
* deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
@ -464,7 +428,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &kiq->ring;
int i, r;
int i, r = 0;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@ -479,7 +443,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
RESET_QUEUES, 0, 0);
r = amdgpu_ring_test_helper(kiq_ring);
if (adev->gfx.kiq.ring.sched.ready)
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
return r;
@ -535,6 +501,9 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
return r;
}
if (adev->enable_mes)
queue_mask = ~0ULL;
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);

View file

@ -30,6 +30,7 @@
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
#include "amdgpu_rlc.h"
#include "amdgpu_imu.h"
#include "soc15.h"
#include "amdgpu_ras.h"
@ -56,6 +57,9 @@ struct amdgpu_mec {
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
struct amdgpu_bo *mec_fw_data_obj;
u64 mec_fw_data_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
@ -106,15 +110,6 @@ struct amdgpu_kiq {
const struct kiq_pm4_funcs *pmf;
};
/*
* GPU scratch registers structures, functions & helpers
*/
struct amdgpu_scratch {
unsigned num_reg;
uint32_t reg_base;
uint32_t free_mask;
};
/*
* GFX configurations
*/
@ -244,6 +239,10 @@ struct amdgpu_pfp {
struct amdgpu_bo *pfp_fw_obj;
uint64_t pfp_fw_gpu_addr;
uint32_t *pfp_fw_ptr;
struct amdgpu_bo *pfp_fw_data_obj;
uint64_t pfp_fw_data_gpu_addr;
uint32_t *pfp_fw_data_ptr;
};
struct amdgpu_ce {
@ -256,6 +255,11 @@ struct amdgpu_me {
struct amdgpu_bo *me_fw_obj;
uint64_t me_fw_gpu_addr;
uint32_t *me_fw_ptr;
struct amdgpu_bo *me_fw_data_obj;
uint64_t me_fw_data_gpu_addr;
uint32_t *me_fw_data_ptr;
uint32_t num_me;
uint32_t num_pipe_per_me;
uint32_t num_queue_per_pipe;
@ -274,7 +278,8 @@ struct amdgpu_gfx {
struct amdgpu_me me;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
struct amdgpu_scratch scratch;
struct amdgpu_imu imu;
bool rs64_enable; /* firmware format */
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
@ -287,6 +292,8 @@ struct amdgpu_gfx {
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
const struct firmware *imu_fw; /* IMU firmware */
uint32_t imu_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
@ -335,8 +342,10 @@ struct amdgpu_gfx {
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/*ras */
struct ras_common_if *ras_if;
struct amdgpu_gfx_ras *ras;
struct ras_common_if *ras_if;
struct amdgpu_gfx_ras *ras;
bool is_poweron;
};
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
@ -357,9 +366,6 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
return (u32)((1ULL << bit_width) - 1);
}
int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);

View file

@ -100,7 +100,9 @@ struct amdgpu_vmhub {
uint32_t eng_distance;
uint32_t eng_addr_distance; /* include LO32/HI32 */
uint32_t vm_cntx_cntl;
uint32_t vm_cntx_cntl_vm_fault;
uint32_t vm_l2_bank_select_reserved_cid2;
const struct amdgpu_vmhub_funcs *vmhub_funcs;
};

View file

@ -155,12 +155,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_ctx = 0;
}
if (!ring->sched.ready) {
if (!ring->sched.ready && !ring->is_mes_queue) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
return -EINVAL;
}
if (vm && !job->vmid) {
if (vm && !job->vmid && !ring->is_mes_queue) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
@ -390,6 +390,10 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
if (!ring->sched.ready || !ring->funcs->test_ib)
continue;
if (adev->enable_mes &&
ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
continue;
/* MM engine need more time */
if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE ||

View file

@ -0,0 +1,51 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_IMU_H__
#define __AMDGPU_IMU_H__
struct amdgpu_imu_funcs {
int (*init_microcode)(struct amdgpu_device *adev);
int (*load_microcode)(struct amdgpu_device *adev);
void (*setup_imu)(struct amdgpu_device *adev);
int (*start_imu)(struct amdgpu_device *adev);
void (*program_rlc_ram)(struct amdgpu_device *adev);
};
struct imu_rlc_ram_golden {
u32 hwip;
u32 instance;
u32 segment;
u32 reg;
u32 data;
u32 addr_mask;
};
#define IMU_RLC_RAM_GOLDEN_VALUE(ip, inst, reg, data, addr_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, data, addr_mask }
struct amdgpu_imu {
const struct amdgpu_imu_funcs *funcs;
};
#endif

View file

@ -216,3 +216,21 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
return r;
}
int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
struct ras_common_if *ras_if = adev->jpeg.ras_if;
struct ras_dispatch_if ih_data = {
.entry = entry,
};
if (!ras_if)
return 0;
ih_data.head = *ras_if;
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}

View file

@ -69,4 +69,8 @@ void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
#endif /*__AMDGPU_JPEG_H__*/

View file

@ -185,12 +185,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
adev->runpm = true;
break;
}
/* XXX: disable runtime pm if we are the primary adapter
* to avoid displays being re-enabled after DPMS.
* This needs to be sorted out and fixed properly.
*/
if (adev->is_fw_fb)
adev->runpm = false;
amdgpu_runtime_pm_quirk(adev);

View file

@ -0,0 +1,91 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_lsdma.h"
#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL
int amdgpu_lsdma_wait_for(struct amdgpu_device *adev,
uint32_t reg_index, uint32_t reg_val,
uint32_t mask)
{
uint32_t val;
int i;
for (i = 0; i < adev->usec_timeout; i++) {
val = RREG32(reg_index);
if ((val & mask) == reg_val)
return 0;
udelay(1);
}
return -ETIME;
}
int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev,
uint64_t src_addr,
uint64_t dst_addr,
uint64_t mem_size)
{
int ret;
if (mem_size == 0)
return -EINVAL;
while (mem_size > 0) {
uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size);
if (ret)
return ret;
src_addr += current_copy_size;
dst_addr += current_copy_size;
mem_size -= current_copy_size;
}
return 0;
}
int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev,
uint64_t dst_addr,
uint32_t data,
uint64_t mem_size)
{
int ret;
if (mem_size == 0)
return -EINVAL;
while (mem_size > 0) {
uint64_t current_fill_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
ret = adev->lsdma.funcs->fill_mem(adev, dst_addr, data, current_fill_size);
if (ret)
return ret;
dst_addr += current_fill_size;
mem_size -= current_fill_size;
}
return 0;
}

View file

@ -0,0 +1,46 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_LSDMA_H__
#define __AMDGPU_LSDMA_H__
struct amdgpu_lsdma {
const struct amdgpu_lsdma_funcs *funcs;
};
struct amdgpu_lsdma_funcs {
int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr,
uint64_t dst_addr, uint64_t size);
int (*fill_mem)(struct amdgpu_device *adev, uint64_t dst_addr,
uint32_t data, uint64_t size);
void (*update_memory_power_gating)(struct amdgpu_device *adev, bool enable);
};
int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr,
uint64_t dst_addr, uint64_t mem_size);
int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, uint64_t dst_addr,
uint32_t data, uint64_t mem_size);
int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, uint32_t reg_index,
uint32_t reg_val, uint32_t mask);
#endif

File diff suppressed because it is too large Load diff

View file

@ -24,6 +24,11 @@
#ifndef __AMDGPU_MES_H__
#define __AMDGPU_MES_H__
#include "amdgpu_irq.h"
#include "kgd_kfd_interface.h"
#include "amdgpu_gfx.h"
#include <linux/sched/mm.h>
#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
@ -37,11 +42,29 @@ enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_NUM_LEVELS
};
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
struct amdgpu_mes_funcs;
enum admgpu_mes_pipe {
AMDGPU_MES_SCHED_PIPE = 0,
AMDGPU_MES_KIQ_PIPE,
AMDGPU_MAX_MES_PIPES = 2,
};
struct amdgpu_mes {
struct amdgpu_device *adev;
struct mutex mutex_hidden;
struct idr pasid_idr;
struct idr gang_id_idr;
struct idr queue_id_idr;
struct ida doorbell_ida;
spinlock_t queue_id_lock;
uint32_t total_max_queue;
uint32_t doorbell_id_offset;
uint32_t max_doorbell_slices;
@ -51,27 +74,28 @@ struct amdgpu_mes {
struct amdgpu_ring ring;
const struct firmware *fw;
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
/* mes ucode */
struct amdgpu_bo *ucode_fw_obj;
uint64_t ucode_fw_gpu_addr;
uint32_t *ucode_fw_ptr;
uint32_t ucode_fw_version;
uint64_t uc_start_addr;
struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
uint32_t ucode_fw_version[AMDGPU_MAX_MES_PIPES];
uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
/* mes ucode data */
struct amdgpu_bo *data_fw_obj;
uint64_t data_fw_gpu_addr;
uint32_t *data_fw_ptr;
uint32_t data_fw_version;
uint64_t data_start_addr;
struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
uint32_t data_fw_version[AMDGPU_MAX_MES_PIPES];
uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
/* eop gpu obj */
struct amdgpu_bo *eop_gpu_obj;
uint64_t eop_gpu_addr;
struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
void *mqd_backup;
void *mqd_backup[AMDGPU_MAX_MES_PIPES];
struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
@ -85,11 +109,81 @@ struct amdgpu_mes {
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
uint32_t saved_flags;
/* initialize kiq pipe */
int (*kiq_hw_init)(struct amdgpu_device *adev);
int (*kiq_hw_fini)(struct amdgpu_device *adev);
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
};
struct amdgpu_mes_process {
int pasid;
struct amdgpu_vm *vm;
uint64_t pd_gpu_addr;
struct amdgpu_bo *proc_ctx_bo;
uint64_t proc_ctx_gpu_addr;
void *proc_ctx_cpu_ptr;
uint64_t process_quantum;
struct list_head gang_list;
uint32_t doorbell_index;
unsigned long *doorbell_bitmap;
struct mutex doorbell_lock;
};
struct amdgpu_mes_gang {
int gang_id;
int priority;
int inprocess_gang_priority;
int global_priority_level;
struct list_head list;
struct amdgpu_mes_process *process;
struct amdgpu_bo *gang_ctx_bo;
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
uint64_t gang_quantum;
struct list_head queue_list;
};
struct amdgpu_mes_queue {
struct list_head list;
struct amdgpu_mes_gang *gang;
int queue_id;
uint64_t doorbell_off;
struct amdgpu_bo *mqd_obj;
void *mqd_cpu_ptr;
uint64_t mqd_gpu_addr;
uint64_t wptr_gpu_addr;
int queue_type;
int paging;
struct amdgpu_ring *ring;
};
struct amdgpu_mes_queue_properties {
int queue_type;
uint64_t hqd_base_gpu_addr;
uint64_t rptr_gpu_addr;
uint64_t wptr_gpu_addr;
uint32_t queue_size;
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority;
bool paging;
struct amdgpu_ring *ring;
/* out */
uint64_t doorbell_off;
};
struct amdgpu_mes_gang_properties {
uint32_t priority;
uint32_t gang_quantum;
uint32_t inprocess_gang_priority;
uint32_t priority_level;
int global_priority_level;
};
struct mes_add_queue_input {
uint32_t process_id;
uint64_t page_table_base_addr;
@ -106,6 +200,10 @@ struct mes_add_queue_input {
uint64_t wptr_addr;
uint32_t queue_type;
uint32_t paging;
uint32_t gws_base;
uint32_t gws_size;
uint64_t tba_addr;
uint64_t tma_addr;
};
struct mes_remove_queue_input {
@ -113,6 +211,16 @@ struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
struct mes_unmap_legacy_queue_input {
enum amdgpu_unmap_queues_action action;
uint32_t queue_type;
uint32_t doorbell_offset;
uint32_t pipe_id;
uint32_t queue_id;
uint64_t trail_fence_addr;
uint64_t trail_fence_data;
};
struct mes_suspend_gang_input {
bool suspend_all_gangs;
uint64_t gang_context_addr;
@ -132,6 +240,9 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input);
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
@ -139,4 +250,117 @@ struct amdgpu_mes_funcs {
struct mes_resume_gang_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
int amdgpu_mes_init(struct amdgpu_device *adev);
void amdgpu_mes_fini(struct amdgpu_device *adev);
int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
struct amdgpu_vm *vm);
void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_gang_properties *gprops,
int *gang_id);
int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
int amdgpu_mes_suspend(struct amdgpu_device *adev);
int amdgpu_mes_resume(struct amdgpu_device *adev);
int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
struct amdgpu_mes_queue_properties *qprops,
int *queue_id);
int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq);
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
struct amdgpu_mes_ctx_data *ctx_data,
struct amdgpu_ring **out);
void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data);
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_self_test(struct amdgpu_device *adev);
int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
unsigned int *doorbell_index);
void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
unsigned int doorbell_index);
unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
struct amdgpu_device *adev,
uint32_t doorbell_index,
unsigned int doorbell_id);
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
/*
* MES lock can be taken in MMU notifiers.
*
* A bit more detail about why to set no-FS reclaim with MES lock:
*
* The purpose of the MMU notifier is to stop GPU access to memory so
* that the Linux VM subsystem can move pages around safely. This is
* done by preempting user mode queues for the affected process. When
* MES is used, MES lock needs to be taken to preempt the queues.
*
* The MMU notifier callback entry point in the driver is
* amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
* there is:
* amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
* kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
*
* The last part of the chain is a function pointer where we take the
* MES lock.
*
* The problem with taking locks in the MMU notifier is, that MMU
* notifiers can be called in reclaim-FS context. That's where the
* kernel frees up pages to make room for new page allocations under
* memory pressure. While we are running in reclaim-FS context, we must
* not trigger another memory reclaim operation because that would
* recursively reenter the reclaim code and cause a deadlock. The
* memalloc_nofs_save/restore calls guarantee that.
*
* In addition we also need to avoid lock dependencies on other locks taken
* under the MES lock, for example reservation locks. Here is a possible
* scenario of a deadlock:
* Thread A: takes and holds reservation lock | triggers reclaim-FS |
* MMU notifier | blocks trying to take MES lock
* Thread B: takes and holds MES lock | blocks trying to take reservation lock
*
* In this scenario Thread B gets involved in a deadlock even without
* triggering a reclaim-FS operation itself.
* To fix this and break the lock dependency chain you'd need to either:
* 1. protect reservation locks with memalloc_nofs_save/restore, or
* 2. avoid taking reservation locks under the MES lock.
*
* Reservation locks are taken all over the kernel in different subsystems, we
* have no control over them and their lock dependencies.So the only workable
* solution is to avoid taking other locks under the MES lock.
* As a result, make sure no reclaim-FS happens while holding this lock anywhere
* to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
*/
static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
{
mutex_lock(&mes->mutex_hidden);
mes->saved_flags = memalloc_noreclaim_save();
}
static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
{
memalloc_noreclaim_restore(mes->saved_flags);
mutex_unlock(&mes->mutex_hidden);
}
#endif /* __AMDGPU_MES_H__ */

View file

@ -0,0 +1,121 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_MES_CTX_H__
#define __AMDGPU_MES_CTX_H__
#include "v10_structs.h"
enum {
AMDGPU_MES_CTX_RPTR_OFFS = 0,
AMDGPU_MES_CTX_WPTR_OFFS,
AMDGPU_MES_CTX_FENCE_OFFS,
AMDGPU_MES_CTX_COND_EXE_OFFS,
AMDGPU_MES_CTX_TRAIL_FENCE_OFFS,
AMDGPU_MES_CTX_MAX_OFFS,
};
enum {
AMDGPU_MES_CTX_RING_OFFS = AMDGPU_MES_CTX_MAX_OFFS,
AMDGPU_MES_CTX_IB_OFFS,
AMDGPU_MES_CTX_PADDING_OFFS,
};
#define AMDGPU_MES_CTX_MAX_GFX_RINGS 1
#define AMDGPU_MES_CTX_MAX_COMPUTE_RINGS 4
#define AMDGPU_MES_CTX_MAX_SDMA_RINGS 2
#define AMDGPU_MES_CTX_MAX_RINGS \
(AMDGPU_MES_CTX_MAX_GFX_RINGS + \
AMDGPU_MES_CTX_MAX_COMPUTE_RINGS + \
AMDGPU_MES_CTX_MAX_SDMA_RINGS)
#define AMDGPU_CSA_SDMA_SIZE 64
#define GFX10_MEC_HPD_SIZE 2048
struct amdgpu_wb_slot {
uint32_t data[8];
};
struct amdgpu_mes_ctx_meta_data {
struct {
uint8_t ring[PAGE_SIZE * 4];
/* gfx csa */
struct v10_gfx_meta_data gfx_meta_data;
uint8_t gds_backup[64 * 1024];
struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
/* only for ib test */
uint32_t ib[256] __aligned(256);
uint32_t padding[64];
} __aligned(PAGE_SIZE) gfx[AMDGPU_MES_CTX_MAX_GFX_RINGS];
struct {
uint8_t ring[PAGE_SIZE * 4];
uint8_t mec_hpd[GFX10_MEC_HPD_SIZE];
struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
/* only for ib test */
uint32_t ib[256] __aligned(256);
uint32_t padding[64];
} __aligned(PAGE_SIZE) compute[AMDGPU_MES_CTX_MAX_COMPUTE_RINGS];
struct {
uint8_t ring[PAGE_SIZE * 4];
/* sdma csa for mcbp */
uint8_t sdma_meta_data[AMDGPU_CSA_SDMA_SIZE];
struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
/* only for ib test */
uint32_t ib[256] __aligned(256);
uint32_t padding[64];
} __aligned(PAGE_SIZE) sdma[AMDGPU_MES_CTX_MAX_SDMA_RINGS];
};
struct amdgpu_mes_ctx_data {
struct amdgpu_bo *meta_data_obj;
uint64_t meta_data_gpu_addr;
struct amdgpu_bo_va *meta_data_va;
void *meta_data_ptr;
uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];
};
#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
#endif

View file

@ -27,6 +27,7 @@ struct amdgpu_mmhub_ras {
struct amdgpu_mmhub_funcs {
u64 (*get_fb_location)(struct amdgpu_device *adev);
u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
void (*init)(struct amdgpu_device *adev);
int (*gart_enable)(struct amdgpu_device *adev);
void (*set_fault_enable_default)(struct amdgpu_device *adev,

View file

@ -70,6 +70,7 @@ struct amdgpu_nbio_funcs {
bool use_doorbell, int doorbell_index, int doorbell_size);
void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance);
void (*gc_doorbell_init)(struct amdgpu_device *adev);
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
bool enable);
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,

File diff suppressed because it is too large Load diff

View file

@ -129,6 +129,8 @@ struct psp_funcs
void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*vbflash_stat)(struct psp_context *psp);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
@ -244,6 +246,7 @@ enum psp_runtime_entry_type {
PSP_RUNTIME_ENTRY_TYPE_MGPU_WAFL = 0x3, /* WAFL runtime data */
PSP_RUNTIME_ENTRY_TYPE_MGPU_XGMI = 0x4, /* XGMI runtime data */
PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG = 0x5, /* Boot Config runtime data */
PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS = 0x6, /* SCPM validation data */
};
/* PSP runtime DB header */
@ -278,12 +281,24 @@ enum psp_runtime_boot_cfg_feature {
BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING = 0x2,
};
/* PSP run time DB SCPM authentication defines */
enum psp_runtime_scpm_authentication {
SCPM_DISABLE = 0x0,
SCPM_ENABLE = 0x1,
SCPM_ENABLE_WITH_SCPM_ERR = 0x2,
};
/* PSP runtime DB boot config entry */
struct psp_runtime_boot_cfg_entry {
uint32_t boot_cfg_bitmask;
uint32_t reserved;
};
/* PSP runtime DB SCPM entry */
struct psp_runtime_scpm_entry {
enum psp_runtime_scpm_authentication scpm_status;
};
struct psp_context
{
struct amdgpu_device *adev;
@ -358,6 +373,10 @@ struct psp_context
struct psp_memory_training_context mem_train_ctx;
uint32_t boot_cfg_bitmask;
char *vbflash_tmp_buf;
size_t vbflash_image_size;
bool vbflash_done;
};
struct amdgpu_psp_funcs {
@ -404,6 +423,14 @@ struct amdgpu_psp_funcs {
((psp)->funcs->read_usbc_pd_fw ? \
(psp)->funcs->read_usbc_pd_fw((psp), fw_ver) : -EINVAL)
#define psp_update_spirom(psp, fw_pri_mc_addr) \
((psp)->funcs->update_spirom ? \
(psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
#define psp_vbflash_status(psp) \
((psp)->funcs->vbflash_stat ? \
(psp)->funcs->vbflash_stat((psp)) : -EINVAL)
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@ -483,4 +510,7 @@ int psp_load_fw_list(struct psp_context *psp,
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
int is_psp_fw_valid(struct psp_bin_desc bin);
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev);
void amdgpu_psp_sysfs_fini(struct amdgpu_device *adev);
#endif

View file

@ -1538,33 +1538,42 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
struct amdgpu_iv_entry *entry)
{
bool poison_stat = true, need_reset = true;
bool poison_stat = false;
struct amdgpu_device *adev = obj->adev;
struct ras_err_data err_data = {0, 0, 0, NULL};
struct amdgpu_ras_block_object *block_obj =
amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
if (!adev->gmc.xgmi.connected_to_cpu)
amdgpu_umc_poison_handler(adev, &err_data, false);
if (!block_obj || !block_obj->hw_ops)
return;
/* both query_poison_status and handle_poison_consumption are optional */
if (block_obj && block_obj->hw_ops) {
if (block_obj->hw_ops->query_poison_status) {
poison_stat = block_obj->hw_ops->query_poison_status(adev);
if (!poison_stat)
dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
block_obj->ras_comm.name);
}
/* both query_poison_status and handle_poison_consumption are optional,
* but at least one of them should be implemented if we need poison
* consumption handler
*/
if (block_obj->hw_ops->query_poison_status) {
poison_stat = block_obj->hw_ops->query_poison_status(adev);
if (!poison_stat) {
/* Not poison consumption interrupt, no need to handle it */
dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
block_obj->ras_comm.name);
if (poison_stat && block_obj->hw_ops->handle_poison_consumption) {
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
need_reset = poison_stat;
return;
}
}
/* gpu reset is fallback for all failed cases */
if (need_reset)
if (!adev->gmc.xgmi.connected_to_cpu)
amdgpu_umc_poison_handler(adev, &err_data, false);
if (block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
/* gpu reset is fallback for failed and default cases */
if (poison_stat) {
dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
block_obj->ras_comm.name);
amdgpu_ras_reset_gpu(adev);
}
}
static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
@ -2516,7 +2525,9 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
return 0;
ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
if (ras_obj->ras_cb) {
if (ras_obj->ras_cb || (ras_obj->hw_ops &&
(ras_obj->hw_ops->query_poison_status ||
ras_obj->hw_ops->handle_poison_consumption))) {
r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
if (r)
goto cleanup;

View file

@ -149,6 +149,16 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->funcs->end_use(ring);
}
#define amdgpu_ring_get_gpu_addr(ring, offset) \
(ring->is_mes_queue ? \
(ring->mes_ctx->meta_data_gpu_addr + offset) : \
(ring->adev->wb.gpu_addr + offset * 4))
#define amdgpu_ring_get_cpu_addr(ring, offset) \
(ring->is_mes_queue ? \
(void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
(&ring->adev->wb.wb[offset]))
/**
* amdgpu_ring_init - init driver ring struct.
*
@ -189,51 +199,88 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return -EINVAL;
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
ring->num_hw_submission = sched_hw_submission;
ring->sched_score = sched_score;
ring->vmid_wait = dma_fence_get_stub();
if (!ring->is_mes_queue) {
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
}
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
}
r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
return r;
if (ring->is_mes_queue) {
ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_RPTR_OFFS);
ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_WPTR_OFFS);
ring->fence_offs = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_FENCE_OFFS);
ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_TRAIL_FENCE_OFFS);
ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_COND_EXE_OFFS);
} else {
r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_device_wb_get(adev, &ring->fence_offs);
if (r) {
dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
if (r) {
dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
return r;
}
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
return r;
}
}
r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
return r;
}
ring->fence_gpu_addr =
amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
ring->fence_cpu_addr =
amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
r = amdgpu_device_wb_get(adev, &ring->fence_offs);
if (r) {
dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
return r;
}
ring->rptr_gpu_addr =
amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs);
ring->rptr_cpu_addr =
amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs);
ring->wptr_gpu_addr =
amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs);
ring->wptr_cpu_addr =
amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs);
r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
if (r) {
dev_err(adev->dev,
"(%d) ring trail_fence_offs wb alloc failed\n", r);
return r;
}
ring->trail_fence_gpu_addr =
adev->wb.gpu_addr + (ring->trail_fence_offs * 4);
ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs];
amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
ring->trail_fence_cpu_addr =
amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
ring->cond_exe_gpu_addr =
amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
ring->cond_exe_cpu_addr =
amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
return r;
}
ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
@ -248,8 +295,20 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
if (ring->is_mes_queue) {
int offset = 0;
BUG_ON(ring->ring_size > PAGE_SIZE*4);
offset = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_RING_OFFS);
ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
amdgpu_ring_clear_ring(ring);
} else if (ring->ring_obj == NULL) {
r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
@ -286,26 +345,30 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
/* Not to finish a ring which is not initialized */
if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
if (!(ring->adev) ||
(!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
return;
ring->sched.ready = false;
amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
if (!ring->is_mes_queue) {
amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
amdgpu_device_wb_free(ring->adev, ring->fence_offs);
amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
amdgpu_device_wb_free(ring->adev, ring->fence_offs);
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
}
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
ring->adev->rings[ring->idx] = NULL;
if (!ring->is_mes_queue)
ring->adev->rings[ring->idx] = NULL;
}
/**
@ -458,3 +521,51 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->sched.ready = !r;
return r;
}
static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
struct amdgpu_mqd_prop *prop)
{
struct amdgpu_device *adev = ring->adev;
memset(prop, 0, sizeof(*prop));
prop->mqd_gpu_addr = ring->mqd_gpu_addr;
prop->hqd_base_gpu_addr = ring->gpu_addr;
prop->rptr_gpu_addr = ring->rptr_gpu_addr;
prop->wptr_gpu_addr = ring->wptr_gpu_addr;
prop->queue_size = ring->ring_size;
prop->eop_gpu_addr = ring->eop_gpu_addr;
prop->use_doorbell = ring->use_doorbell;
prop->doorbell_index = ring->doorbell_index;
/* map_queues packet doesn't need activate the queue,
* so only kiq need set this field.
*/
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
prop->hqd_queue_priority =
AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
}
}
int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_mqd *mqd_mgr;
struct amdgpu_mqd_prop prop;
amdgpu_ring_to_mqd_prop(ring, &prop);
ring->wptr = 0;
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE];
else
mqd_mgr = &adev->mqds[ring->funcs->type];
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
}

View file

@ -230,6 +230,8 @@ struct amdgpu_ring {
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
unsigned rptr_offs;
u64 rptr_gpu_addr;
volatile u32 *rptr_cpu_addr;
u64 wptr;
u64 wptr_old;
unsigned ring_size;
@ -250,7 +252,11 @@ struct amdgpu_ring {
bool use_doorbell;
bool use_pollmem;
unsigned wptr_offs;
u64 wptr_gpu_addr;
volatile u32 *wptr_cpu_addr;
unsigned fence_offs;
u64 fence_gpu_addr;
volatile u32 *fence_cpu_addr;
uint64_t current_ctx;
char name[16];
u32 trail_seq;
@ -267,6 +273,11 @@ struct amdgpu_ring {
int hw_prio;
unsigned num_hw_submission;
atomic_t *sched_score;
/* used for mes */
bool is_mes_queue;
uint32_t hw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
};
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@ -364,11 +375,22 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
(ring->is_mes_queue && ring->mes_ctx ? \
(ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \
(ring->is_mes_queue && ring->mes_ctx ? \
(void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
NULL)
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
{
return ib->ptr[idx];

View file

@ -69,6 +69,47 @@ typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_MAX = 38,
} FIRMWARE_ID;
typedef enum _SOC21_FIRMWARE_ID_ {
SOC21_FIRMWARE_ID_INVALID = 0,
SOC21_FIRMWARE_ID_RLC_G_UCODE = 1,
SOC21_FIRMWARE_ID_RLC_TOC = 2,
SOC21_FIRMWARE_ID_RLCG_SCRATCH = 3,
SOC21_FIRMWARE_ID_RLC_SRM_ARAM = 4,
SOC21_FIRMWARE_ID_RLC_P_UCODE = 5,
SOC21_FIRMWARE_ID_RLC_V_UCODE = 6,
SOC21_FIRMWARE_ID_RLX6_UCODE = 7,
SOC21_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
SOC21_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
SOC21_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
SOC21_FIRMWARE_ID_CP_PFP = 13,
SOC21_FIRMWARE_ID_CP_ME = 14,
SOC21_FIRMWARE_ID_CP_MEC = 15,
SOC21_FIRMWARE_ID_RS64_MES_P0 = 16,
SOC21_FIRMWARE_ID_RS64_MES_P1 = 17,
SOC21_FIRMWARE_ID_RS64_PFP = 18,
SOC21_FIRMWARE_ID_RS64_ME = 19,
SOC21_FIRMWARE_ID_RS64_MEC = 20,
SOC21_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
SOC21_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
SOC21_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
SOC21_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
SOC21_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
SOC21_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
SOC21_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
SOC21_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
SOC21_FIRMWARE_ID_RLX6_DRAM_SR = 35,
SOC21_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
SOC21_FIRMWARE_ID_MAX = 37
} SOC21_FIRMWARE_ID;
typedef struct _RLC_TABLE_OF_CONTENT {
union {
unsigned int DW0;
@ -179,6 +220,8 @@ struct amdgpu_rlc {
u32 save_restore_list_srm_size_bytes;
u32 rlc_iram_ucode_size_bytes;
u32 rlc_dram_ucode_size_bytes;
u32 rlcp_ucode_size_bytes;
u32 rlcv_ucode_size_bytes;
u32 *register_list_format;
u32 *register_restore;
@ -187,6 +230,8 @@ struct amdgpu_rlc {
u8 *save_restore_list_srm;
u8 *rlc_iram_ucode;
u8 *rlc_dram_ucode;
u8 *rlcp_ucode;
u8 *rlcv_ucode;
bool is_rlc_v2_1;

View file

@ -74,14 +74,22 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp)
return 0;
r = amdgpu_sdma_get_index_from_ring(ring, &index);
if (ring->is_mes_queue) {
uint32_t offset = 0;
if (r || index > 31)
csa_mc_addr = 0;
else
csa_mc_addr = amdgpu_csa_vaddr(adev) +
AMDGPU_CSA_SDMA_OFFSET +
index * AMDGPU_CSA_SDMA_SIZE;
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
sdma[ring->idx].sdma_meta_data);
csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
} else {
r = amdgpu_sdma_get_index_from_ring(ring, &index);
if (r || index > 31)
csa_mc_addr = 0;
else
csa_mc_addr = amdgpu_csa_vaddr(adev) +
AMDGPU_CSA_SDMA_OFFSET +
index * AMDGPU_CSA_SDMA_SIZE;
}
return csa_mc_addr;
}

View file

@ -115,11 +115,30 @@ void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr)
le32_to_cpu(gfx_hdr->ucode_feature_version));
DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(gfx_hdr->jt_offset));
DRM_DEBUG("jt_size: %u\n", le32_to_cpu(gfx_hdr->jt_size));
} else if (version_major == 2) {
const struct gfx_firmware_header_v2_0 *gfx_hdr =
container_of(hdr, struct gfx_firmware_header_v2_0, header);
DRM_DEBUG("ucode_feature_version: %u\n",
le32_to_cpu(gfx_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown GFX ucode version: %u.%u\n", version_major, version_minor);
}
}
void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
DRM_DEBUG("IMU\n");
amdgpu_ucode_print_common_hdr(hdr);
if (version_major != 1) {
DRM_ERROR("Unknown GFX ucode version: %u.%u\n", version_major, version_minor);
}
}
void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
@ -238,6 +257,17 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
container_of(sdma_hdr, struct sdma_firmware_header_v1_1, v1_0);
DRM_DEBUG("digest_size: %u\n", le32_to_cpu(sdma_v1_1_hdr->digest_size));
}
} else if (version_major == 2) {
const struct sdma_firmware_header_v2_0 *sdma_hdr =
container_of(hdr, struct sdma_firmware_header_v2_0, header);
DRM_DEBUG("ucode_feature_version: %u\n",
le32_to_cpu(sdma_hdr->ucode_feature_version));
DRM_DEBUG("ctx_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_offset));
DRM_DEBUG("ctx_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_size));
DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
} else {
DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
version_major, version_minor);
@ -248,6 +278,8 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
uint32_t fw_index;
const struct psp_fw_bin_desc *desc;
DRM_DEBUG("PSP\n");
amdgpu_ucode_print_common_hdr(hdr);
@ -312,6 +344,71 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("spl_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->spl.size_bytes));
}
} else if (version_major == 2) {
const struct psp_firmware_header_v2_0 *psp_hdr_v2_0 =
container_of(hdr, struct psp_firmware_header_v2_0, header);
for (fw_index = 0; fw_index < le32_to_cpu(psp_hdr_v2_0->psp_fw_bin_count); fw_index++) {
desc = &(psp_hdr_v2_0->psp_fw_bin[fw_index]);
switch (desc->fw_type) {
case PSP_FW_TYPE_PSP_SOS:
DRM_DEBUG("psp_sos_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_sos_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_SYS_DRV:
DRM_DEBUG("psp_sys_drv_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_sys_drv_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_KDB:
DRM_DEBUG("psp_kdb_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_kdb_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_TOC:
DRM_DEBUG("psp_toc_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_toc_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_SPL:
DRM_DEBUG("psp_spl_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_spl_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_RL:
DRM_DEBUG("psp_rl_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_rl_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_SOC_DRV:
DRM_DEBUG("psp_soc_drv_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_soc_drv_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_INTF_DRV:
DRM_DEBUG("psp_intf_drv_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_intf_drv_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
case PSP_FW_TYPE_PSP_DBG_DRV:
DRM_DEBUG("psp_dbg_drv_version: %u\n",
le32_to_cpu(desc->fw_version));
DRM_DEBUG("psp_dbg_drv_size_bytes: %u\n",
le32_to_cpu(desc->size_bytes));
break;
default:
DRM_DEBUG("Unsupported PSP fw type: %d\n", desc->fw_type);
break;
}
}
} else {
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
version_major, version_minor);
@ -355,8 +452,8 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
{
if ((hdr->common.header_version_major == hdr_major) &&
(hdr->common.header_version_minor == hdr_minor))
return false;
return true;
return true;
return false;
}
enum amdgpu_firmware_load_type
@ -442,6 +539,10 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "SDMA6";
case AMDGPU_UCODE_ID_SDMA7:
return "SDMA7";
case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
return "SDMA_CTX";
case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
return "SDMA_CTL";
case AMDGPU_UCODE_ID_CP_CE:
return "CP_CE";
case AMDGPU_UCODE_ID_CP_PFP:
@ -460,6 +561,10 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "CP_MES";
case AMDGPU_UCODE_ID_CP_MES_DATA:
return "CP_MES_DATA";
case AMDGPU_UCODE_ID_CP_MES1:
return "CP_MES_KIQ";
case AMDGPU_UCODE_ID_CP_MES1_DATA:
return "CP_MES_KIQ_DATA";
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
return "RLC_RESTORE_LIST_CNTL";
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
@ -472,10 +577,20 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "RLC_DRAM";
case AMDGPU_UCODE_ID_RLC_G:
return "RLC_G";
case AMDGPU_UCODE_ID_RLC_P:
return "RLC_P";
case AMDGPU_UCODE_ID_RLC_V:
return "RLC_V";
case AMDGPU_UCODE_ID_IMU_I:
return "IMU_I";
case AMDGPU_UCODE_ID_IMU_D:
return "IMU_D";
case AMDGPU_UCODE_ID_STORAGE:
return "STORAGE";
case AMDGPU_UCODE_ID_SMC:
return "SMC";
case AMDGPU_UCODE_ID_PPTABLE:
return "PPTABLE";
case AMDGPU_UCODE_ID_UVD:
return "UVD";
case AMDGPU_UCODE_ID_UVD1:
@ -570,9 +685,12 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
{
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct gfx_firmware_header_v2_0 *cpv2_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
u8 *ucode_addr;
if (NULL == ucode->fw)
@ -586,12 +704,25 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
switch (ucode->ucode_id) {
case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
ucode->ucode_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
ucode->ucode_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(sdma_hdr->ctl_ucode_offset);
break;
case AMDGPU_UCODE_ID_CP_MEC1:
case AMDGPU_UCODE_ID_CP_MEC2:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
@ -626,6 +757,14 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlc_dram_ucode;
break;
case AMDGPU_UCODE_ID_RLC_P:
ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlcp_ucode;
break;
case AMDGPU_UCODE_ID_RLC_V:
ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlcv_ucode;
break;
case AMDGPU_UCODE_ID_CP_MES:
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
@ -636,6 +775,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_MES1:
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_MES1_DATA:
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
break;
case AMDGPU_UCODE_ID_DMCU_ERAM:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
le32_to_cpu(dmcu_hdr->intv_size_bytes);
@ -653,6 +802,76 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_PPTABLE:
ucode->ucode_size = ucode->fw->size;
ucode_addr = (u8 *)ucode->fw->data;
break;
case AMDGPU_UCODE_ID_IMU_I:
ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_IMU_D:
ucode->ucode_size = le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes) +
le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_PFP:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_ME:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_MEC:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
default:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
@ -714,8 +933,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
{
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
}

View file

@ -170,6 +170,18 @@ struct gfx_firmware_header_v1_0 {
uint32_t jt_size; /* size of jt */
};
/* version_major=2, version_minor=0 */
struct gfx_firmware_header_v2_0 {
struct common_firmware_header header;
uint32_t ucode_feature_version;
uint32_t ucode_size_bytes;
uint32_t ucode_offset_bytes;
uint32_t data_size_bytes;
uint32_t data_offset_bytes;
uint32_t ucode_start_addr_lo;
uint32_t ucode_start_addr_hi;
};
/* version_major=1, version_minor=0 */
struct mes_firmware_header_v1_0 {
struct common_firmware_header header;
@ -236,7 +248,7 @@ struct rlc_firmware_header_v2_1 {
uint32_t save_restore_list_srm_offset_bytes;
};
/* version_major=2, version_minor=1 */
/* version_major=2, version_minor=2 */
struct rlc_firmware_header_v2_2 {
struct rlc_firmware_header_v2_1 v2_1;
uint32_t rlc_iram_ucode_size_bytes;
@ -245,6 +257,15 @@ struct rlc_firmware_header_v2_2 {
uint32_t rlc_dram_ucode_offset_bytes;
};
/* version_major=2, version_minor=3 */
struct rlc_firmware_header_v2_3 {
struct rlc_firmware_header_v2_2 v2_2;
uint32_t rlcp_ucode_size_bytes;
uint32_t rlcp_ucode_offset_bytes;
uint32_t rlcv_ucode_size_bytes;
uint32_t rlcv_ucode_offset_bytes;
};
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@ -260,6 +281,19 @@ struct sdma_firmware_header_v1_1 {
uint32_t digest_size;
};
/* version_major=2, version_minor=0 */
struct sdma_firmware_header_v2_0 {
struct common_firmware_header header;
uint32_t ucode_feature_version;
uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
uint32_t ctx_jt_offset; /* context thread jt location */
uint32_t ctx_jt_size; /* context thread size of jt */
uint32_t ctl_ucode_offset;
uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
uint32_t ctl_jt_offset; /* control thread jt location */
uint32_t ctl_jt_size; /* control thread size of jt */
};
/* gpu info payload */
struct gpu_info_firmware_v1_0 {
uint32_t gc_num_se;
@ -313,6 +347,15 @@ struct dmcub_firmware_header_v1_0 {
uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
};
/* version_major=1, version_minor=0 */
struct imu_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t imu_iram_ucode_size_bytes;
uint32_t imu_iram_ucode_offset_bytes;
uint32_t imu_dram_ucode_size_bytes;
uint32_t imu_dram_ucode_offset_bytes;
};
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@ -326,14 +369,19 @@ union amdgpu_firmware_header {
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
struct gfx_firmware_header_v2_0 gfx_v2_0;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
struct rlc_firmware_header_v2_1 rlc_v2_1;
struct rlc_firmware_header_v2_2 rlc_v2_2;
struct rlc_firmware_header_v2_3 rlc_v2_3;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct sdma_firmware_header_v2_0 sdma_v2_0;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
struct dmcub_firmware_header_v1_0 dmcub;
struct imu_firmware_header_v1_0 imu;
uint8_t raw[0x100];
};
@ -352,23 +400,43 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA5,
AMDGPU_UCODE_ID_SDMA6,
AMDGPU_UCODE_ID_SDMA7,
AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
AMDGPU_UCODE_ID_CP_CE,
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
AMDGPU_UCODE_ID_CP_RS64_PFP,
AMDGPU_UCODE_ID_CP_RS64_ME,
AMDGPU_UCODE_ID_CP_RS64_MEC,
AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK,
AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK,
AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK,
AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK,
AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK,
AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK,
AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK,
AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK,
AMDGPU_UCODE_ID_CP_MEC1,
AMDGPU_UCODE_ID_CP_MEC1_JT,
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_CP_MES,
AMDGPU_UCODE_ID_CP_MES_DATA,
AMDGPU_UCODE_ID_CP_MES1,
AMDGPU_UCODE_ID_CP_MES1_DATA,
AMDGPU_UCODE_ID_IMU_I,
AMDGPU_UCODE_ID_IMU_D,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_RLC_IRAM,
AMDGPU_UCODE_ID_RLC_DRAM,
AMDGPU_UCODE_ID_RLC_P,
AMDGPU_UCODE_ID_RLC_V,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_PPTABLE,
AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE,
@ -391,8 +459,8 @@ enum AMDGPU_UCODE_STATUS {
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};

View file

@ -53,6 +53,8 @@
#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@ -71,6 +73,8 @@ MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@ -175,6 +179,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
case IP_VERSION(4, 0, 0):
fw_name = FIRMWARE_VCN4_0_0;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
case IP_VERSION(4, 0, 4):
fw_name = FIRMWARE_VCN4_0_4;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
default:
return -EINVAL;
}
@ -228,8 +244,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
} else {
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
}
bo_size += fw_shared_size;
if (amdgpu_vcnfw_log)
@ -1103,3 +1126,21 @@ void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
log_buf->wrapped = 0;
#endif
}
int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
struct ras_common_if *ras_if = adev->vcn.ras_if;
struct ras_dispatch_if ih_data = {
.entry = entry,
};
if (!ras_if)
return 0;
ih_data.head = *ras_if;
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}

View file

@ -65,8 +65,6 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
@ -157,6 +155,7 @@
} \
} while (0)
#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6)
#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8)
#define AMDGPU_VCN_SW_RING_FLAG (1 << 9)
@ -288,6 +287,13 @@ struct amdgpu_fw_shared_sw_ring {
uint8_t padding[3];
};
struct amdgpu_fw_shared_unified_queue_struct {
uint8_t is_enabled;
uint8_t queue_mode;
uint8_t queue_status;
uint8_t padding[5];
};
struct amdgpu_fw_shared_fw_logging {
uint8_t is_enabled;
uint32_t addr_lo;
@ -311,6 +317,14 @@ struct amdgpu_fw_shared {
struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
};
struct amdgpu_vcn4_fw_shared {
uint32_t present_flag_0;
uint8_t pad[12];
struct amdgpu_fw_shared_unified_queue_struct sq;
uint8_t pad1[8];
struct amdgpu_fw_shared_fw_logging fw_log;
};
struct amdgpu_vcn_fwlog {
uint32_t rptr;
uint32_t wptr;
@ -361,4 +375,9 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
uint8_t i, struct amdgpu_vcn_inst *vcn);
int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
#endif

View file

@ -581,7 +581,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
}
dma_fence_put(fence);
if (ring->funcs->emit_gds_switch && gds_switch_needed) {
if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
gds_switch_needed) {
id->gds_base = job->gds_base;
id->gds_size = job->gds_size;
id->gws_base = job->gws_base;
@ -705,6 +706,9 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (r)
goto error;
/* vm_flush_needed after updating PDEs */
atomic64_inc(&vm->tlb_seq);
while (!list_empty(&vm->relocated)) {
entry = list_first_entry(&vm->relocated,
struct amdgpu_vm_bo_base,

View file

@ -0,0 +1,98 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "athub_v3_0.h"
#include "athub/athub_3_0_0_offset.h"
#include "athub/athub_3_0_0_sh_mask.h"
#include "navi10_enum.h"
#include "soc15_common.h"
static void
athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
if (def != data)
WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
}
static void
athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
if (def != data)
WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
}
int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
if (amdgpu_sriov_vf(adev))
return 0;
switch (adev->ip_versions[ATHUB_HWIP][0]) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 2):
athub_v3_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
athub_v3_0_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE);
break;
default:
break;
}
return 0;
}
void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
/* AMD_CG_SUPPORT_ATHUB_MGCG */
data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
/* AMD_CG_SUPPORT_ATHUB_LS */
if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_LS;
}

View file

@ -0,0 +1,30 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __ATHUB_V3_0_H__
#define __ATHUB_V3_0_H__
int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif

View file

@ -164,7 +164,7 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
{
u32 rptr;
rptr = ring->adev->wb.wb[ring->rptr_offs];
rptr = *ring->rptr_cpu_addr;
return (rptr & 0x3fffc) >> 2;
}
@ -436,12 +436,10 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
u32 wb_offset;
int i, j, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@ -477,9 +475,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
((adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
((ring->rptr_gpu_addr) & 0xFFFFFFFC));
rb_cntl |= SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;

View file

@ -0,0 +1,988 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __CLEARSTATE_GFX11_H_
#define __CLEARSTATE_GFX11_H_
static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
{
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
0x00000000, // DB_RENDER_OVERRIDE
0x00000000, // DB_RENDER_OVERRIDE2
0x00000000, // DB_HTILE_DATA_BASE
0, // HOLE
0x00000000, // DB_DEPTH_SIZE_XY
0x00000000, // DB_DEPTH_BOUNDS_MIN
0x00000000, // DB_DEPTH_BOUNDS_MAX
0x00000000, // DB_STENCIL_CLEAR
0x00000000, // DB_DEPTH_CLEAR
0x00000000, // PA_SC_SCREEN_SCISSOR_TL
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
0x00000000, // DB_DFSM_CONTROL
0x00000000, // DB_RESERVED_REG_2
0x00000000, // DB_Z_INFO
0x00000000, // DB_STENCIL_INFO
0x00000000, // DB_Z_READ_BASE
0x00000000, // DB_STENCIL_READ_BASE
0x00000000, // DB_Z_WRITE_BASE
0x00000000, // DB_STENCIL_WRITE_BASE
0x00000000, // DB_RESERVED_REG_1
0x00000000, // DB_RESERVED_REG_3
0x00000000, // DB_SPI_VRS_CENTER_LOCATION
0x00000000, // DB_VRS_OVERRIDE_CNTL
0x00000000, // DB_Z_READ_BASE_HI
0x00000000, // DB_STENCIL_READ_BASE_HI
0x00000000, // DB_Z_WRITE_BASE_HI
0x00000000, // DB_STENCIL_WRITE_BASE_HI
0x00000000, // DB_HTILE_DATA_BASE_HI
0x00150055, // DB_RMI_L2_CACHE_CONTROL
0x00000000, // TA_BC_BASE_ADDR
0x00000000, // TA_BC_BASE_ADDR_HI
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // COHER_DEST_BASE_HI_0
0x00000000, // COHER_DEST_BASE_HI_1
0x00000000, // COHER_DEST_BASE_HI_2
0x00000000, // COHER_DEST_BASE_HI_3
0x00000000, // COHER_DEST_BASE_2
0x00000000, // COHER_DEST_BASE_3
0x00000000, // PA_SC_WINDOW_OFFSET
0x80000000, // PA_SC_WINDOW_SCISSOR_TL
0x40004000, // PA_SC_WINDOW_SCISSOR_BR
0x0000ffff, // PA_SC_CLIPRECT_RULE
0x00000000, // PA_SC_CLIPRECT_0_TL
0x40004000, // PA_SC_CLIPRECT_0_BR
0x00000000, // PA_SC_CLIPRECT_1_TL
0x40004000, // PA_SC_CLIPRECT_1_BR
0x00000000, // PA_SC_CLIPRECT_2_TL
0x40004000, // PA_SC_CLIPRECT_2_BR
0x00000000, // PA_SC_CLIPRECT_3_TL
0x40004000, // PA_SC_CLIPRECT_3_BR
0xaa99aaaa, // PA_SC_EDGERULE
0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
0xffffffff, // CB_TARGET_MASK
0xffffffff, // CB_SHADER_MASK
0x80000000, // PA_SC_GENERIC_SCISSOR_TL
0x40004000, // PA_SC_GENERIC_SCISSOR_BR
0x00000000, // COHER_DEST_BASE_0
0x00000000, // COHER_DEST_BASE_1
0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
0x00000000, // PA_SC_VPORT_ZMIN_0
0x3f800000, // PA_SC_VPORT_ZMAX_0
0x00000000, // PA_SC_VPORT_ZMIN_1
0x3f800000, // PA_SC_VPORT_ZMAX_1
0x00000000, // PA_SC_VPORT_ZMIN_2
0x3f800000, // PA_SC_VPORT_ZMAX_2
0x00000000, // PA_SC_VPORT_ZMIN_3
0x3f800000, // PA_SC_VPORT_ZMAX_3
0x00000000, // PA_SC_VPORT_ZMIN_4
0x3f800000, // PA_SC_VPORT_ZMAX_4
0x00000000, // PA_SC_VPORT_ZMIN_5
0x3f800000, // PA_SC_VPORT_ZMAX_5
0x00000000, // PA_SC_VPORT_ZMIN_6
0x3f800000, // PA_SC_VPORT_ZMAX_6
0x00000000, // PA_SC_VPORT_ZMIN_7
0x3f800000, // PA_SC_VPORT_ZMAX_7
0x00000000, // PA_SC_VPORT_ZMIN_8
0x3f800000, // PA_SC_VPORT_ZMAX_8
0x00000000, // PA_SC_VPORT_ZMIN_9
0x3f800000, // PA_SC_VPORT_ZMAX_9
0x00000000, // PA_SC_VPORT_ZMIN_10
0x3f800000, // PA_SC_VPORT_ZMAX_10
0x00000000, // PA_SC_VPORT_ZMIN_11
0x3f800000, // PA_SC_VPORT_ZMAX_11
0x00000000, // PA_SC_VPORT_ZMIN_12
0x3f800000, // PA_SC_VPORT_ZMAX_12
0x00000000, // PA_SC_VPORT_ZMIN_13
0x3f800000, // PA_SC_VPORT_ZMAX_13
0x00000000, // PA_SC_VPORT_ZMIN_14
0x3f800000, // PA_SC_VPORT_ZMAX_14
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
0x00000000, // PA_SC_RASTER_CONFIG
0x00000000, // PA_SC_RASTER_CONFIG_1
0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
};
static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
{
0x00000000, // CP_PERFMON_CNTX_CNTL
0x00000000, // CP_PIPEID
0x00000000, // CP_VMID
0x00000000, // CONTEXT_RESERVED_REG0
0x00000000, // CONTEXT_RESERVED_REG1
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // PA_SC_FSR_EN
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_VIEW
0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // PA_SC_VRS_RATE_BASE
0x00000000, // PA_SC_VRS_RATE_BASE_EXT
0x00000000, // PA_SC_VRS_RATE_SIZE_XY
0x00000000, // PA_SC_VRS_RATE_VIEW
0xffffffff, // VGT_MAX_VTX_INDX
0x00000000, // VGT_MIN_VTX_INDX
0x00000000, // VGT_INDX_OFFSET
0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
0x00550055, // CB_RMI_GL2_CACHE_CONTROL
0x00000000, // CB_BLEND_RED
0x00000000, // CB_BLEND_GREEN
0x00000000, // CB_BLEND_BLUE
0x00000000, // CB_BLEND_ALPHA
0x00000000, // CB_DCC_CONTROL
0x00000000, // CB_COVERAGE_OUT_CONTROL
0x00000000, // DB_STENCIL_CONTROL
0x01000000, // DB_STENCILREFMASK
0x01000000, // DB_STENCILREFMASK_BF
0, // HOLE
0x00000000, // PA_CL_VPORT_XSCALE
0x00000000, // PA_CL_VPORT_XOFFSET
0x00000000, // PA_CL_VPORT_YSCALE
0x00000000, // PA_CL_VPORT_YOFFSET
0x00000000, // PA_CL_VPORT_ZSCALE
0x00000000, // PA_CL_VPORT_ZOFFSET
0x00000000, // PA_CL_VPORT_XSCALE_1
0x00000000, // PA_CL_VPORT_XOFFSET_1
0x00000000, // PA_CL_VPORT_YSCALE_1
0x00000000, // PA_CL_VPORT_YOFFSET_1
0x00000000, // PA_CL_VPORT_ZSCALE_1
0x00000000, // PA_CL_VPORT_ZOFFSET_1
0x00000000, // PA_CL_VPORT_XSCALE_2
0x00000000, // PA_CL_VPORT_XOFFSET_2
0x00000000, // PA_CL_VPORT_YSCALE_2
0x00000000, // PA_CL_VPORT_YOFFSET_2
0x00000000, // PA_CL_VPORT_ZSCALE_2
0x00000000, // PA_CL_VPORT_ZOFFSET_2
0x00000000, // PA_CL_VPORT_XSCALE_3
0x00000000, // PA_CL_VPORT_XOFFSET_3
0x00000000, // PA_CL_VPORT_YSCALE_3
0x00000000, // PA_CL_VPORT_YOFFSET_3
0x00000000, // PA_CL_VPORT_ZSCALE_3
0x00000000, // PA_CL_VPORT_ZOFFSET_3
0x00000000, // PA_CL_VPORT_XSCALE_4
0x00000000, // PA_CL_VPORT_XOFFSET_4
0x00000000, // PA_CL_VPORT_YSCALE_4
0x00000000, // PA_CL_VPORT_YOFFSET_4
0x00000000, // PA_CL_VPORT_ZSCALE_4
0x00000000, // PA_CL_VPORT_ZOFFSET_4
0x00000000, // PA_CL_VPORT_XSCALE_5
0x00000000, // PA_CL_VPORT_XOFFSET_5
0x00000000, // PA_CL_VPORT_YSCALE_5
0x00000000, // PA_CL_VPORT_YOFFSET_5
0x00000000, // PA_CL_VPORT_ZSCALE_5
0x00000000, // PA_CL_VPORT_ZOFFSET_5
0x00000000, // PA_CL_VPORT_XSCALE_6
0x00000000, // PA_CL_VPORT_XOFFSET_6
0x00000000, // PA_CL_VPORT_YSCALE_6
0x00000000, // PA_CL_VPORT_YOFFSET_6
0x00000000, // PA_CL_VPORT_ZSCALE_6
0x00000000, // PA_CL_VPORT_ZOFFSET_6
0x00000000, // PA_CL_VPORT_XSCALE_7
0x00000000, // PA_CL_VPORT_XOFFSET_7
0x00000000, // PA_CL_VPORT_YSCALE_7
0x00000000, // PA_CL_VPORT_YOFFSET_7
0x00000000, // PA_CL_VPORT_ZSCALE_7
0x00000000, // PA_CL_VPORT_ZOFFSET_7
0x00000000, // PA_CL_VPORT_XSCALE_8
0x00000000, // PA_CL_VPORT_XOFFSET_8
0x00000000, // PA_CL_VPORT_YSCALE_8
0x00000000, // PA_CL_VPORT_YOFFSET_8
0x00000000, // PA_CL_VPORT_ZSCALE_8
0x00000000, // PA_CL_VPORT_ZOFFSET_8
0x00000000, // PA_CL_VPORT_XSCALE_9
0x00000000, // PA_CL_VPORT_XOFFSET_9
0x00000000, // PA_CL_VPORT_YSCALE_9
0x00000000, // PA_CL_VPORT_YOFFSET_9
0x00000000, // PA_CL_VPORT_ZSCALE_9
0x00000000, // PA_CL_VPORT_ZOFFSET_9
0x00000000, // PA_CL_VPORT_XSCALE_10
0x00000000, // PA_CL_VPORT_XOFFSET_10
0x00000000, // PA_CL_VPORT_YSCALE_10
0x00000000, // PA_CL_VPORT_YOFFSET_10
0x00000000, // PA_CL_VPORT_ZSCALE_10
0x00000000, // PA_CL_VPORT_ZOFFSET_10
0x00000000, // PA_CL_VPORT_XSCALE_11
0x00000000, // PA_CL_VPORT_XOFFSET_11
0x00000000, // PA_CL_VPORT_YSCALE_11
0x00000000, // PA_CL_VPORT_YOFFSET_11
0x00000000, // PA_CL_VPORT_ZSCALE_11
0x00000000, // PA_CL_VPORT_ZOFFSET_11
0x00000000, // PA_CL_VPORT_XSCALE_12
0x00000000, // PA_CL_VPORT_XOFFSET_12
0x00000000, // PA_CL_VPORT_YSCALE_12
0x00000000, // PA_CL_VPORT_YOFFSET_12
0x00000000, // PA_CL_VPORT_ZSCALE_12
0x00000000, // PA_CL_VPORT_ZOFFSET_12
0x00000000, // PA_CL_VPORT_XSCALE_13
0x00000000, // PA_CL_VPORT_XOFFSET_13
0x00000000, // PA_CL_VPORT_YSCALE_13
0x00000000, // PA_CL_VPORT_YOFFSET_13
0x00000000, // PA_CL_VPORT_ZSCALE_13
0x00000000, // PA_CL_VPORT_ZOFFSET_13
0x00000000, // PA_CL_VPORT_XSCALE_14
0x00000000, // PA_CL_VPORT_XOFFSET_14
0x00000000, // PA_CL_VPORT_YSCALE_14
0x00000000, // PA_CL_VPORT_YOFFSET_14
0x00000000, // PA_CL_VPORT_ZSCALE_14
0x00000000, // PA_CL_VPORT_ZOFFSET_14
0x00000000, // PA_CL_VPORT_XSCALE_15
0x00000000, // PA_CL_VPORT_XOFFSET_15
0x00000000, // PA_CL_VPORT_YSCALE_15
0x00000000, // PA_CL_VPORT_YOFFSET_15
0x00000000, // PA_CL_VPORT_ZSCALE_15
0x00000000, // PA_CL_VPORT_ZOFFSET_15
0x00000000, // PA_CL_UCP_0_X
0x00000000, // PA_CL_UCP_0_Y
0x00000000, // PA_CL_UCP_0_Z
0x00000000, // PA_CL_UCP_0_W
0x00000000, // PA_CL_UCP_1_X
0x00000000, // PA_CL_UCP_1_Y
0x00000000, // PA_CL_UCP_1_Z
0x00000000, // PA_CL_UCP_1_W
0x00000000, // PA_CL_UCP_2_X
0x00000000, // PA_CL_UCP_2_Y
0x00000000, // PA_CL_UCP_2_Z
0x00000000, // PA_CL_UCP_2_W
0x00000000, // PA_CL_UCP_3_X
0x00000000, // PA_CL_UCP_3_Y
0x00000000, // PA_CL_UCP_3_Z
0x00000000, // PA_CL_UCP_3_W
0x00000000, // PA_CL_UCP_4_X
0x00000000, // PA_CL_UCP_4_Y
0x00000000, // PA_CL_UCP_4_Z
0x00000000, // PA_CL_UCP_4_W
0x00000000, // PA_CL_UCP_5_X
0x00000000, // PA_CL_UCP_5_Y
0x00000000, // PA_CL_UCP_5_Z
0x00000000, // PA_CL_UCP_5_W
0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
0x00000000, // PA_RATE_CNTL
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // SPI_PS_INPUT_CNTL_0
0x00000000, // SPI_PS_INPUT_CNTL_1
0x00000000, // SPI_PS_INPUT_CNTL_2
0x00000000, // SPI_PS_INPUT_CNTL_3
0x00000000, // SPI_PS_INPUT_CNTL_4
0x00000000, // SPI_PS_INPUT_CNTL_5
0x00000000, // SPI_PS_INPUT_CNTL_6
0x00000000, // SPI_PS_INPUT_CNTL_7
0x00000000, // SPI_PS_INPUT_CNTL_8
0x00000000, // SPI_PS_INPUT_CNTL_9
0x00000000, // SPI_PS_INPUT_CNTL_10
0x00000000, // SPI_PS_INPUT_CNTL_11
0x00000000, // SPI_PS_INPUT_CNTL_12
0x00000000, // SPI_PS_INPUT_CNTL_13
0x00000000, // SPI_PS_INPUT_CNTL_14
0x00000000, // SPI_PS_INPUT_CNTL_15
0x00000000, // SPI_PS_INPUT_CNTL_16
0x00000000, // SPI_PS_INPUT_CNTL_17
0x00000000, // SPI_PS_INPUT_CNTL_18
0x00000000, // SPI_PS_INPUT_CNTL_19
0x00000000, // SPI_PS_INPUT_CNTL_20
0x00000000, // SPI_PS_INPUT_CNTL_21
0x00000000, // SPI_PS_INPUT_CNTL_22
0x00000000, // SPI_PS_INPUT_CNTL_23
0x00000000, // SPI_PS_INPUT_CNTL_24
0x00000000, // SPI_PS_INPUT_CNTL_25
0x00000000, // SPI_PS_INPUT_CNTL_26
0x00000000, // SPI_PS_INPUT_CNTL_27
0x00000000, // SPI_PS_INPUT_CNTL_28
0x00000000, // SPI_PS_INPUT_CNTL_29
0x00000000, // SPI_PS_INPUT_CNTL_30
0x00000000, // SPI_PS_INPUT_CNTL_31
0x00000000, // SPI_VS_OUT_CONFIG
0, // HOLE
0x00000000, // SPI_PS_INPUT_ENA
0x00000000, // SPI_PS_INPUT_ADDR
0x00000000, // SPI_INTERP_CONTROL_0
0x00000002, // SPI_PS_IN_CONTROL
0x00000000, // SPI_BARYC_SSAA_CNTL
0x00000000, // SPI_BARYC_CNTL
0, // HOLE
0x00000000, // SPI_TMPRING_SIZE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // SPI_SHADER_IDX_FORMAT
0x00000000, // SPI_SHADER_POS_FORMAT
0x00000000, // SPI_SHADER_Z_FORMAT
0x00000000, // SPI_SHADER_COL_FORMAT
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // SX_PS_DOWNCONVERT_CONTROL
0x00000000, // SX_PS_DOWNCONVERT
0x00000000, // SX_BLEND_OPT_EPSILON
0x00000000, // SX_BLEND_OPT_CONTROL
0x00000000, // SX_MRT0_BLEND_OPT
0x00000000, // SX_MRT1_BLEND_OPT
0x00000000, // SX_MRT2_BLEND_OPT
0x00000000, // SX_MRT3_BLEND_OPT
0x00000000, // SX_MRT4_BLEND_OPT
0x00000000, // SX_MRT5_BLEND_OPT
0x00000000, // SX_MRT6_BLEND_OPT
0x00000000, // SX_MRT7_BLEND_OPT
0x00000000, // CB_BLEND0_CONTROL
0x00000000, // CB_BLEND1_CONTROL
0x00000000, // CB_BLEND2_CONTROL
0x00000000, // CB_BLEND3_CONTROL
0x00000000, // CB_BLEND4_CONTROL
0x00000000, // CB_BLEND5_CONTROL
0x00000000, // CB_BLEND6_CONTROL
0x00000000, // CB_BLEND7_CONTROL
};
static const unsigned int gfx11_SECT_CONTEXT_def_3[] =
{
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
0x00000000, // PA_CL_POINT_CULL_RAD
};
static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
{
0x00000000, // GE_MAX_OUTPUT_PER_SUBGROUP
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
0x00000000, // DB_SHADER_CONTROL
0x00090000, // PA_CL_CLIP_CNTL
0x00000004, // PA_SU_SC_MODE_CNTL
0x00000000, // PA_CL_VTE_CNTL
0x00000000, // PA_CL_VS_OUT_CNTL
0x00000000, // PA_CL_NANINF_CNTL
0x00000000, // PA_SU_LINE_STIPPLE_CNTL
0x00000000, // PA_SU_LINE_STIPPLE_SCALE
0x00000000, // PA_SU_PRIM_FILTER_CNTL
0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
0, // HOLE
0x00000000, // PA_CL_NGG_CNTL
0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
0x00000000, // PA_STEREO_CNTL
0x00000000, // PA_STATE_STEREO_X
0x00000000, // PA_CL_VRS_CNTL
0x00000000, // PA_SIDEBAND_REQUEST_DELAYS
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // PA_SU_POINT_SIZE
0x00000000, // PA_SU_POINT_MINMAX
0x00000000, // PA_SU_LINE_CNTL
0x00000000, // PA_SC_LINE_STIPPLE
0x00000000, // VGT_OUTPUT_PATH_CNTL
0x00000000, // VGT_HOS_CNTL
0x00000000, // VGT_HOS_MAX_TESS_LEVEL
0x00000000, // VGT_HOS_MIN_TESS_LEVEL
0x00000000, // VGT_HOS_REUSE_DEPTH
0x00000000, // VGT_GROUP_PRIM_TYPE
0x00000000, // VGT_GROUP_FIRST_DECR
0x00000000, // VGT_GROUP_DECR
0x00000000, // VGT_GROUP_VECT_0_CNTL
0x00000000, // VGT_GROUP_VECT_1_CNTL
0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
0x00000000, // VGT_GS_MODE
0x00000000, // VGT_GS_ONCHIP_CNTL
0x00000000, // PA_SC_MODE_CNTL_0
0x00000000, // PA_SC_MODE_CNTL_1
0x00000000, // VGT_ENHANCE
0x00000100, // VGT_GS_PER_ES
0x00000080, // VGT_ES_PER_GS
0x00000002, // VGT_GS_PER_VS
0x00000000, // VGT_GSVS_RING_OFFSET_1
0x00000000, // VGT_GSVS_RING_OFFSET_2
0x00000000, // VGT_GSVS_RING_OFFSET_3
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
{
0x00000000, // WD_ENHANCE
0x00000000, // VGT_PRIMITIVEID_EN
};
static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
{
0x00000000, // VGT_PRIMITIVEID_RESET
};
static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
{
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
0, // HOLE
0x00000000, // VGT_INSTANCE_STEP_RATE_0
0x00000000, // VGT_INSTANCE_STEP_RATE_1
0x000000ff, // IA_MULTI_VGT_PARAM
0x00000000, // VGT_ESGS_RING_ITEMSIZE
0x00000000, // VGT_GSVS_RING_ITEMSIZE
0x00000000, // VGT_REUSE_OFF
0x00000000, // VGT_VTX_CNT_EN
0x00000000, // DB_HTILE_SURFACE
0x00000000, // DB_SRESULTS_COMPARE_STATE0
0x00000000, // DB_SRESULTS_COMPARE_STATE1
0x00000000, // DB_PRELOAD_CONTROL
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
0, // HOLE
0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
0, // HOLE
0x00000000, // VGT_GS_MAX_VERT_OUT
0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
0x00000000, // GE_NGG_SUBGRP_CNTL
0x00000000, // VGT_TESS_DISTRIBUTION
0x00000000, // VGT_SHADER_STAGES_EN
0x00000000, // VGT_LS_HS_CONFIG
0x00000000, // VGT_GS_VERT_ITEMSIZE
0x00000000, // VGT_GS_VERT_ITEMSIZE_1
0x00000000, // VGT_GS_VERT_ITEMSIZE_2
0x00000000, // VGT_GS_VERT_ITEMSIZE_3
0x00000000, // VGT_TF_PARAM
0x00000000, // DB_ALPHA_TO_MASK
0, // HOLE
0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
0x00000000, // PA_SU_POLY_OFFSET_CLAMP
0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
0x00000000, // VGT_GS_INSTANCE_CNT
0x00000000, // VGT_STRMOUT_CONFIG
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
};
static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
{
0x00000000, // PA_SC_CENTROID_PRIORITY_0
0x00000000, // PA_SC_CENTROID_PRIORITY_1
0x00001000, // PA_SC_LINE_CNTL
0x00000000, // PA_SC_AA_CONFIG
0x00000005, // PA_SU_VTX_CNTL
0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
0x00000000, // PA_SC_SHADER_CONTROL
0x00000003, // PA_SC_BINNER_CNTL_0
0x00000000, // PA_SC_BINNER_CNTL_1
0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
0x00000000, // PA_SC_NGG_MODE_CNTL
0x00000000, // PA_SC_BINNER_CNTL_2
0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
0x00000020, // VGT_OUT_DEALLOC_CNTL
0x00000000, // CB_COLOR0_BASE
0x00000000, // CB_COLOR0_PITCH
0x00000000, // CB_COLOR0_SLICE
0x00000000, // CB_COLOR0_VIEW
0x00000000, // CB_COLOR0_INFO
0x00000000, // CB_COLOR0_ATTRIB
0x00000000, // CB_COLOR0_DCC_CONTROL
0x00000000, // CB_COLOR0_CMASK
0x00000000, // CB_COLOR0_CMASK_SLICE
0x00000000, // CB_COLOR0_FMASK
0x00000000, // CB_COLOR0_FMASK_SLICE
0x00000000, // CB_COLOR0_CLEAR_WORD0
0x00000000, // CB_COLOR0_CLEAR_WORD1
0x00000000, // CB_COLOR0_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR1_BASE
0x00000000, // CB_COLOR1_PITCH
0x00000000, // CB_COLOR1_SLICE
0x00000000, // CB_COLOR1_VIEW
0x00000000, // CB_COLOR1_INFO
0x00000000, // CB_COLOR1_ATTRIB
0x00000000, // CB_COLOR1_DCC_CONTROL
0x00000000, // CB_COLOR1_CMASK
0x00000000, // CB_COLOR1_CMASK_SLICE
0x00000000, // CB_COLOR1_FMASK
0x00000000, // CB_COLOR1_FMASK_SLICE
0x00000000, // CB_COLOR1_CLEAR_WORD0
0x00000000, // CB_COLOR1_CLEAR_WORD1
0x00000000, // CB_COLOR1_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR2_BASE
0x00000000, // CB_COLOR2_PITCH
0x00000000, // CB_COLOR2_SLICE
0x00000000, // CB_COLOR2_VIEW
0x00000000, // CB_COLOR2_INFO
0x00000000, // CB_COLOR2_ATTRIB
0x00000000, // CB_COLOR2_DCC_CONTROL
0x00000000, // CB_COLOR2_CMASK
0x00000000, // CB_COLOR2_CMASK_SLICE
0x00000000, // CB_COLOR2_FMASK
0x00000000, // CB_COLOR2_FMASK_SLICE
0x00000000, // CB_COLOR2_CLEAR_WORD0
0x00000000, // CB_COLOR2_CLEAR_WORD1
0x00000000, // CB_COLOR2_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR3_BASE
0x00000000, // CB_COLOR3_PITCH
0x00000000, // CB_COLOR3_SLICE
0x00000000, // CB_COLOR3_VIEW
0x00000000, // CB_COLOR3_INFO
0x00000000, // CB_COLOR3_ATTRIB
0x00000000, // CB_COLOR3_DCC_CONTROL
0x00000000, // CB_COLOR3_CMASK
0x00000000, // CB_COLOR3_CMASK_SLICE
0x00000000, // CB_COLOR3_FMASK
0x00000000, // CB_COLOR3_FMASK_SLICE
0x00000000, // CB_COLOR3_CLEAR_WORD0
0x00000000, // CB_COLOR3_CLEAR_WORD1
0x00000000, // CB_COLOR3_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR4_BASE
0x00000000, // CB_COLOR4_PITCH
0x00000000, // CB_COLOR4_SLICE
0x00000000, // CB_COLOR4_VIEW
0x00000000, // CB_COLOR4_INFO
0x00000000, // CB_COLOR4_ATTRIB
0x00000000, // CB_COLOR4_DCC_CONTROL
0x00000000, // CB_COLOR4_CMASK
0x00000000, // CB_COLOR4_CMASK_SLICE
0x00000000, // CB_COLOR4_FMASK
0x00000000, // CB_COLOR4_FMASK_SLICE
0x00000000, // CB_COLOR4_CLEAR_WORD0
0x00000000, // CB_COLOR4_CLEAR_WORD1
0x00000000, // CB_COLOR4_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR5_BASE
0x00000000, // CB_COLOR5_PITCH
0x00000000, // CB_COLOR5_SLICE
0x00000000, // CB_COLOR5_VIEW
0x00000000, // CB_COLOR5_INFO
0x00000000, // CB_COLOR5_ATTRIB
0x00000000, // CB_COLOR5_DCC_CONTROL
0x00000000, // CB_COLOR5_CMASK
0x00000000, // CB_COLOR5_CMASK_SLICE
0x00000000, // CB_COLOR5_FMASK
0x00000000, // CB_COLOR5_FMASK_SLICE
0x00000000, // CB_COLOR5_CLEAR_WORD0
0x00000000, // CB_COLOR5_CLEAR_WORD1
0x00000000, // CB_COLOR5_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR6_BASE
0x00000000, // CB_COLOR6_PITCH
0x00000000, // CB_COLOR6_SLICE
0x00000000, // CB_COLOR6_VIEW
0x00000000, // CB_COLOR6_INFO
0x00000000, // CB_COLOR6_ATTRIB
0x00000000, // CB_COLOR6_DCC_CONTROL
0x00000000, // CB_COLOR6_CMASK
0x00000000, // CB_COLOR6_CMASK_SLICE
0x00000000, // CB_COLOR6_FMASK
0x00000000, // CB_COLOR6_FMASK_SLICE
0x00000000, // CB_COLOR6_CLEAR_WORD0
0x00000000, // CB_COLOR6_CLEAR_WORD1
0x00000000, // CB_COLOR6_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR7_BASE
0x00000000, // CB_COLOR7_PITCH
0x00000000, // CB_COLOR7_SLICE
0x00000000, // CB_COLOR7_VIEW
0x00000000, // CB_COLOR7_INFO
0x00000000, // CB_COLOR7_ATTRIB
0x00000000, // CB_COLOR7_DCC_CONTROL
0x00000000, // CB_COLOR7_CMASK
0x00000000, // CB_COLOR7_CMASK_SLICE
0x00000000, // CB_COLOR7_FMASK
0x00000000, // CB_COLOR7_FMASK_SLICE
0x00000000, // CB_COLOR7_CLEAR_WORD0
0x00000000, // CB_COLOR7_CLEAR_WORD1
0x00000000, // CB_COLOR7_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR0_BASE_EXT
0x00000000, // CB_COLOR1_BASE_EXT
0x00000000, // CB_COLOR2_BASE_EXT
0x00000000, // CB_COLOR3_BASE_EXT
0x00000000, // CB_COLOR4_BASE_EXT
0x00000000, // CB_COLOR5_BASE_EXT
0x00000000, // CB_COLOR6_BASE_EXT
0x00000000, // CB_COLOR7_BASE_EXT
0x00000000, // CB_COLOR0_CMASK_BASE_EXT
0x00000000, // CB_COLOR1_CMASK_BASE_EXT
0x00000000, // CB_COLOR2_CMASK_BASE_EXT
0x00000000, // CB_COLOR3_CMASK_BASE_EXT
0x00000000, // CB_COLOR4_CMASK_BASE_EXT
0x00000000, // CB_COLOR5_CMASK_BASE_EXT
0x00000000, // CB_COLOR6_CMASK_BASE_EXT
0x00000000, // CB_COLOR7_CMASK_BASE_EXT
0x00000000, // CB_COLOR0_FMASK_BASE_EXT
0x00000000, // CB_COLOR1_FMASK_BASE_EXT
0x00000000, // CB_COLOR2_FMASK_BASE_EXT
0x00000000, // CB_COLOR3_FMASK_BASE_EXT
0x00000000, // CB_COLOR4_FMASK_BASE_EXT
0x00000000, // CB_COLOR5_FMASK_BASE_EXT
0x00000000, // CB_COLOR6_FMASK_BASE_EXT
0x00000000, // CB_COLOR7_FMASK_BASE_EXT
0x00000000, // CB_COLOR0_DCC_BASE_EXT
0x00000000, // CB_COLOR1_DCC_BASE_EXT
0x00000000, // CB_COLOR2_DCC_BASE_EXT
0x00000000, // CB_COLOR3_DCC_BASE_EXT
0x00000000, // CB_COLOR4_DCC_BASE_EXT
0x00000000, // CB_COLOR5_DCC_BASE_EXT
0x00000000, // CB_COLOR6_DCC_BASE_EXT
0x00000000, // CB_COLOR7_DCC_BASE_EXT
0x00000000, // CB_COLOR0_ATTRIB2
0x00000000, // CB_COLOR1_ATTRIB2
0x00000000, // CB_COLOR2_ATTRIB2
0x00000000, // CB_COLOR3_ATTRIB2
0x00000000, // CB_COLOR4_ATTRIB2
0x00000000, // CB_COLOR5_ATTRIB2
0x00000000, // CB_COLOR6_ATTRIB2
0x00000000, // CB_COLOR7_ATTRIB2
0x00000000, // CB_COLOR0_ATTRIB3
0x00000000, // CB_COLOR1_ATTRIB3
0x00000000, // CB_COLOR2_ATTRIB3
0x00000000, // CB_COLOR3_ATTRIB3
0x00000000, // CB_COLOR4_ATTRIB3
0x00000000, // CB_COLOR5_ATTRIB3
0x00000000, // CB_COLOR6_ATTRIB3
0x00000000, // CB_COLOR7_ATTRIB3
};
static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
{
{gfx11_SECT_CONTEXT_def_1, 0x0000a000, 215 },
{gfx11_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
{gfx11_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
{gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
{gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
{gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
{gfx11_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
{gfx11_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
{ 0, 0, 0 }
};
static const struct cs_section_def gfx11_cs_data[] = {
{ gfx11_SECT_CONTEXT_defs, SECT_CONTEXT },
{ 0, SECT_NONE }
};
#endif /* __CLEARSTATE_GFX11_H_ */

View file

@ -3485,6 +3485,7 @@ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
@ -3502,6 +3503,9 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@ -3519,10 +3523,23 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
uint64_t wptr_addr = ring->wptr_gpu_addr;
uint32_t eng_sel = 0;
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_COMPUTE:
eng_sel = 0;
break;
case AMDGPU_RING_TYPE_GFX:
eng_sel = 4;
break;
case AMDGPU_RING_TYPE_MES:
eng_sel = 5;
break;
default:
WARN_ON(1);
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
@ -3548,8 +3565,14 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
return;
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
@ -3595,12 +3618,7 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub)
{
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
amdgpu_ring_write(kiq_ring,
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
}
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
@ -3726,13 +3744,6 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
gfx_v10_0_init_spm_golden_registers(adev);
}
static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
{
adev->gfx.scratch.num_reg = 8;
adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
}
static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@ -3769,34 +3780,26 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@ -3808,8 +3811,6 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -3820,19 +3821,39 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned index;
uint64_t gpu_addr;
uint32_t tmp;
volatile uint32_t *cpu_ptr;
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
if (ring->is_mes_queue) {
uint32_t padding, offset;
offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
padding = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_PADDING_OFFS);
ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
} else {
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
cpu_ptr = &adev->wb.wb[index];
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
}
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
@ -3853,16 +3874,17 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err2;
}
tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF)
if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
amdgpu_ib_free(adev, &ib, NULL);
if (!ring->is_mes_queue)
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
if (!ring->is_mes_queue)
amdgpu_device_wb_free(adev, index);
return r;
}
@ -4687,7 +4709,6 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
@ -4707,17 +4728,13 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
return 0;
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
int r;
unsigned irq_type;
struct amdgpu_ring *ring;
unsigned int hw_prio;
@ -4742,12 +4759,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
if (r)
return r;
return 0;
}
static int gfx_v10_0_sw_init(void *handle)
@ -4822,8 +4835,6 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v10_0_scratch_init(adev);
r = gfx_v10_0_me_init(adev);
if (r)
return r;
@ -4879,16 +4890,18 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
kiq = &adev->gfx.kiq;
r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
if (r)
return r;
kiq = &adev->gfx.kiq;
r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
if (r)
return r;
}
r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
if (r)
@ -4940,8 +4953,11 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
if (!adev->enable_mes_kiq) {
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
}
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@ -6344,12 +6360,12 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@ -6382,11 +6398,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
/* Set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@ -6565,10 +6581,10 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
}
}
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
struct v10_gfx_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr;
uint32_t tmp;
uint32_t rb_bufsz;
@ -6578,8 +6594,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_wptr_hi = 0;
/* set the pointer to the MQD */
mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
@ -6605,23 +6621,23 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_quantum = tmp;
/* set up gfx hqd base. this is similar as CP_RB_BASE */
hqd_gpu_addr = ring->gpu_addr >> 8;
hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_gfx_hqd_base = hqd_gpu_addr;
mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
mqd->cp_gfx_hqd_rptr_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up rb_wptr_poll addr */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
@ -6632,9 +6648,9 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
/* set up cp_doorbell_control */
tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
if (ring->use_doorbell) {
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_EN, 1);
} else
@ -6642,13 +6658,7 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
DOORBELL_EN, 0);
mqd->cp_rb_doorbell_control = tmp;
/*if there are 2 gfx rings, set the lower doorbell range of the first ring,
*otherwise the range of the second ring will override the first ring */
if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
/* active the queue */
@ -6716,7 +6726,16 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v10_0_gfx_mqd_init(ring);
amdgpu_ring_init_mqd(ring);
/*
* if there are 2 gfx rings, set the lower doorbell
* range of the first ring, otherwise the range of
* the second ring will override the first ring
*/
if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
#ifdef BRING_UP_DEBUG
gfx_v10_0_gfx_queue_init_register(ring);
#endif
@ -6730,7 +6749,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
adev->wb.wb[ring->wptr_offs] = 0;
*ring->wptr_cpu_addr = 0;
amdgpu_ring_clear_ring(ring);
#ifdef BRING_UP_DEBUG
mutex_lock(&adev->srbm_mutex);
@ -6809,23 +6828,10 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
return r;
}
static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd)
static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
struct amdgpu_device *adev = ring->adev;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
mqd->cp_hqd_queue_priority =
AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
}
}
static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
struct v10_compute_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
@ -6837,7 +6843,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
eop_base_addr = ring->eop_gpu_addr >> 8;
eop_base_addr = prop->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@ -6851,9 +6857,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
/* enable doorbell? */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
if (ring->use_doorbell) {
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@ -6868,15 +6874,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_doorbell_control = tmp;
/* disable the queue if it's active */
ring->wptr = 0;
mqd->cp_hqd_dequeue_request = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
/* set the pointer to the MQD */
mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
@ -6884,14 +6889,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
#ifdef __BIG_ENDIAN
@ -6904,22 +6909,22 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
tmp = 0;
/* enable the doorbell if requested */
if (ring->use_doorbell) {
if (prop->use_doorbell) {
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
@ -6932,7 +6937,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
@ -6948,13 +6952,10 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_ib_control = tmp;
/* set static priority for a compute queue/ring */
gfx_v10_0_compute_mqd_set_priority(ring, mqd);
mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
/* map_queues packet doesn't need activate the queue,
* so only kiq need set this field.
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
mqd->cp_hqd_active = 1;
mqd->cp_hqd_active = prop->hqd_active;
return 0;
}
@ -7095,7 +7096,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v10_0_compute_mqd_init(ring);
amdgpu_ring_init_mqd(ring);
gfx_v10_0_kiq_init_register(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@ -7117,7 +7118,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v10_0_compute_mqd_init(ring);
amdgpu_ring_init_mqd(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@ -7130,7 +7131,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
@ -7210,7 +7211,10 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
return r;
}
r = gfx_v10_0_kiq_resume(adev);
if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
r = amdgpu_mes_kiq_hw_init(adev);
else
r = gfx_v10_0_kiq_resume(adev);
if (r)
return r;
@ -7800,6 +7804,7 @@ static int gfx_v10_0_early_init(void *handle)
gfx_v10_0_set_irq_funcs(adev);
gfx_v10_0_set_gds_init(adev);
gfx_v10_0_set_rlc_funcs(adev);
gfx_v10_0_set_mqd_funcs(adev);
/* init rlcg reg access ctrl */
gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
@ -8496,7 +8501,8 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
/* gfx10 is 32bit rptr*/
return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@ -8506,7 +8512,7 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@ -8521,7 +8527,7 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@ -8531,7 +8537,8 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
/* gfx10 hardware is 32bit rptr */
return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@ -8540,7 +8547,7 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@ -8552,7 +8559,7 @@ static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
@ -8614,6 +8621,10 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
if (ring->is_mes_queue)
/* inherit vmid from mqd */
control |= 0x400000;
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@ -8633,6 +8644,10 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
if (ring->is_mes_queue)
/* inherit vmid from mqd */
control |= 0x40000000;
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@ -8690,7 +8705,8 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, ring->is_mes_queue ?
(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
}
static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@ -8703,10 +8719,25 @@ static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
upper_32_bits(addr), seq, 0xffffffff, 4);
}
static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
amdgpu_ring_write(ring,
PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
}
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
if (ring->is_mes_queue)
gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
else
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@ -8861,26 +8892,36 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_ce_ib_state ce_payload = {0};
uint64_t csa_addr;
uint64_t offset, ce_payload_gpu_addr;
void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
csa_addr = amdgpu_csa_vaddr(ring->adev);
if (ring->is_mes_queue) {
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gfx_meta_data) +
offsetof(struct v10_gfx_meta_data, ce_payload);
ce_payload_gpu_addr =
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ce_payload_cpu_addr =
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
} else {
offset = offsetof(struct v10_gfx_meta_data, ce_payload);
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
amdgpu_ring_write(ring, lower_32_bits(csa_addr +
offsetof(struct v10_gfx_meta_data, ce_payload)));
amdgpu_ring_write(ring, upper_32_bits(csa_addr +
offsetof(struct v10_gfx_meta_data, ce_payload)));
amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
if (resume)
amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
offsetof(struct v10_gfx_meta_data,
ce_payload),
amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
sizeof(ce_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
@ -8891,12 +8932,33 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_de_ib_state de_payload = {0};
uint64_t csa_addr, gds_addr;
uint64_t offset, gds_addr, de_payload_gpu_addr;
void *de_payload_cpu_addr;
int cnt;
csa_addr = amdgpu_csa_vaddr(ring->adev);
gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
if (ring->is_mes_queue) {
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gfx_meta_data) +
offsetof(struct v10_gfx_meta_data, de_payload);
de_payload_gpu_addr =
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
de_payload_cpu_addr =
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gds_backup) +
offsetof(struct v10_gfx_meta_data, de_payload);
gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
} else {
offset = offsetof(struct v10_gfx_meta_data, de_payload);
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
}
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@ -8906,15 +8968,11 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
amdgpu_ring_write(ring, lower_32_bits(csa_addr +
offsetof(struct v10_gfx_meta_data, de_payload)));
amdgpu_ring_write(ring, upper_32_bits(csa_addr +
offsetof(struct v10_gfx_meta_data, de_payload)));
amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
if (resume)
amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
offsetof(struct v10_gfx_meta_data,
de_payload),
amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
sizeof(de_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&de_payload,
@ -9151,31 +9209,51 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
int i;
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: CP EOP\n");
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
if (pipe_id == 0)
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
else
amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
* The interrupt can only be enabled/disabled per pipe instead of per queue.
*/
if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
amdgpu_fence_process(ring);
if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
struct amdgpu_mes_queue *queue;
mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
spin_lock(&adev->mes.queue_id_lock);
queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
if (queue) {
DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
amdgpu_fence_process(queue->ring);
}
spin_unlock(&adev->mes.queue_id_lock);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
if (pipe_id == 0)
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
else
amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
* The interrupt can only be enabled/disabled per pipe instead
* of per queue.
*/
if ((ring->me == me_id) &&
(ring->pipe == pipe_id) &&
(ring->queue == queue_id))
amdgpu_fence_process(ring);
}
break;
}
break;
}
return 0;
}
@ -9580,6 +9658,20 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
adev->gds.oa_size = 16;
}
static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
{
/* set gfx eng mqd */
adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
sizeof(struct v10_gfx_mqd);
adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
gfx_v10_0_gfx_mqd_init;
/* set compute eng mqd */
adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
sizeof(struct v10_compute_mqd);
adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
gfx_v10_0_compute_mqd_init;
}
static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
u32 bitmap)
{

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
/*
* Copyright 2021 dvanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GFX_V11_0_H__
#define __GFX_V11_0_H__
extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block;
#endif

View file

@ -1778,39 +1778,26 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
static void gfx_v6_0_scratch_init(struct amdgpu_device *adev)
{
adev->gfx.scratch.num_reg = 8;
adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
}
static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r)
return r;
WREG32(scratch, 0xCAFEDEAD);
WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
goto error_free_scratch;
return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@ -1818,9 +1805,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -1903,50 +1887,42 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
uint32_t scratch;
struct amdgpu_ib ib;
uint32_t tmp = 0;
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
return r;
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256,
AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
goto err2;
goto error;
} else if (r < 0) {
goto err2;
goto error;
}
tmp = RREG32(scratch);
tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
error:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -2117,7 +2093,7 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_WPTR, ring->wptr);
/* set the wb address whether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@ -2139,7 +2115,7 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
return *ring->rptr_cpu_addr;
}
static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
@ -2203,7 +2179,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB1_WPTR, ring->wptr);
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@ -2222,7 +2198,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB2_CNTL, tmp | CP_RB2_CNTL__RB_RPTR_WR_ENA_MASK);
ring->wptr = 0;
WREG32(mmCP_RB2_WPTR, ring->wptr);
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@ -3094,8 +3070,6 @@ static int gfx_v6_0_sw_init(void *handle)
if (r)
return r;
gfx_v6_0_scratch_init(adev);
r = gfx_v6_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");

View file

@ -2049,26 +2049,6 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
/*
* GPU scratch registers helpers function.
*/
/**
* gfx_v7_0_scratch_init - setup driver info for CP scratch regs
*
* @adev: amdgpu_device pointer
*
* Set up the number and offset of the CP scratch registers.
* NOTE: use of CP scratch registers is a legacy interface and
* is not used by default on newer asics (r6xx+). On newer asics,
* memory buffers are used for fences rather than scratch regs.
*/
static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
{
adev->gfx.scratch.num_reg = 8;
adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
}
/**
* gfx_v7_0_ring_test_ring - basic gfx ring test
*
@ -2082,36 +2062,28 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -2355,48 +2327,40 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
uint32_t scratch;
uint32_t tmp = 0;
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
return r;
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256,
AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
goto err2;
goto error;
} else if (r < 0) {
goto err2;
goto error;
}
tmp = RREG32(scratch);
tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
error:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -2630,8 +2594,8 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address whether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
/* set the wb address wether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@ -2656,7 +2620,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
return *ring->rptr_cpu_addr;
}
static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@ -2677,7 +2641,7 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
return ring->adev->wb.wb[ring->wptr_offs];
return *ring->wptr_cpu_addr;
}
static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@ -2685,7 +2649,7 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
@ -2981,12 +2945,12 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set the wb address whether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
/* set the wb address wether it's enabled or not */
wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
@ -4489,8 +4453,6 @@ static int gfx_v7_0_sw_init(void *handle)
if (r)
return r;
gfx_v7_0_scratch_init(adev);
r = gfx_v7_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");

View file

@ -835,37 +835,25 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
}
}
static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
{
adev->gfx.scratch.num_reg = 8;
adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
}
static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@ -874,8 +862,6 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -2000,8 +1986,6 @@ static int gfx_v8_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v8_0_scratch_init(adev);
r = gfx_v8_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@ -4306,11 +4290,11 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
mdelay(1);
@ -4393,7 +4377,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
uint64_t wptr_addr = ring->wptr_gpu_addr;
/* map queues */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
@ -4517,13 +4501,13 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
@ -6051,7 +6035,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
return *ring->rptr_cpu_addr;
}
static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@ -6060,7 +6044,7 @@ static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell)
/* XXX check if swapping is necessary on BE */
return ring->adev->wb.wb[ring->wptr_offs];
return *ring->wptr_cpu_addr;
else
return RREG32(mmCP_RB0_WPTR);
}
@ -6071,7 +6055,7 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@ -6271,7 +6255,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->wptr_offs];
return *ring->wptr_cpu_addr;
}
static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@ -6279,7 +6263,7 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}

View file

@ -780,9 +780,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
uint64_t wptr_addr = ring->wptr_gpu_addr;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
@ -951,13 +950,6 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
}
static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
{
adev->gfx.scratch.num_reg = 8;
adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
}
static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@ -995,27 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@ -1023,9 +1011,6 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -2339,8 +2324,6 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v9_0_scratch_init(adev);
r = gfx_v9_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@ -3326,11 +3309,11 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
@ -3542,13 +3525,13 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
@ -3830,7 +3813,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
@ -5279,7 +5262,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
}
static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@ -5289,7 +5272,7 @@ static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@ -5304,7 +5287,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@ -5469,7 +5452,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
}
static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@ -5478,7 +5461,7 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@ -5490,7 +5473,7 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else{
BUG(); /* only DOORBELL method supported on gfx9 now */

View file

@ -325,6 +325,8 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)

View file

@ -334,6 +334,8 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)

View file

@ -0,0 +1,511 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "gfxhub_v3_0.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
#include "navi10_enum.h"
#include "soc15_common.h"
#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
static const char *gfxhub_client_ids[] = {
"CB/DB",
"Reserved",
"GE1",
"GE2",
"CPF",
"CPC",
"CPG",
"RLC",
"TCP",
"SQC (inst)",
"SQC (data)",
"SQG",
"Reserved",
"SDMA0",
"SDMA1",
"GCR",
"SDMA2",
"SDMA3",
};
static uint32_t gfxhub_v3_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
static void
gfxhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
u32 cid = REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS, CID);
dev_err(adev->dev,
"GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
dev_err(adev->dev, "\t RW: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS, RW));
}
static u64 gfxhub_v3_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
}
static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void gfxhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
gfxhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
/* Disable AGP. */
WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0);
WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, 0x00FFFFFF);
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18);
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
+ adev->vm_manager.vram_base_offset;
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
}
static void gfxhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
tmp = regGCVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
tmp = regGCVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
tmp = regGCVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
}
static void gfxhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
}
static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
{
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
0);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
0);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
}
static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
/*
* GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
adev->gmc.vram_end >> 24);
}
/* GART Enable. */
gfxhub_v3_0_init_gart_aperture_regs(adev);
gfxhub_v3_0_init_system_aperture_regs(adev);
gfxhub_v3_0_init_tlb_regs(adev);
gfxhub_v3_0_init_cache_regs(adev);
gfxhub_v3_0_enable_system_domain(adev);
gfxhub_v3_0_disable_identity_aperture(adev);
gfxhub_v3_0_setup_vmid_config(adev);
gfxhub_v3_0_program_invalidation(adev);
return 0;
}
static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
}
/**
* gfxhub_v3_0_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = {
.print_l2_protection_fault_status = gfxhub_v3_0_print_l2_protection_fault_status,
.get_invalidate_req = gfxhub_v3_0_get_invalidate_req,
};
static void gfxhub_v3_0_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
regGCVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &gfxhub_v3_0_vmhub_funcs;
}
const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs = {
.get_fb_location = gfxhub_v3_0_get_fb_location,
.get_mc_fb_offset = gfxhub_v3_0_get_mc_fb_offset,
.setup_vm_pt_regs = gfxhub_v3_0_setup_vm_pt_regs,
.gart_enable = gfxhub_v3_0_gart_enable,
.gart_disable = gfxhub_v3_0_gart_disable,
.set_fault_enable_default = gfxhub_v3_0_set_fault_enable_default,
.init = gfxhub_v3_0_init,
};

View file

@ -0,0 +1,29 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GFXHUB_V3_0_H__
#define __GFXHUB_V3_0_H__
extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs;
#endif

View file

@ -328,7 +328,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
if (adev->gfx.kiq.ring.sched.ready &&
if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
down_read_trylock(&adev->reset_domain->sem)) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
@ -517,6 +517,10 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
/* MES fw manages IH_VMID_x_LUT updating */
if (ring->is_mes_queue)
return;
if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else

View file

@ -0,0 +1,973 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/firmware.h>
#include <linux/pci.h>
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v11_0.h"
#include "umc_v8_7.h"
#include "athub/athub_3_0_0_sh_mask.h"
#include "athub/athub_3_0_0_offset.h"
#include "oss/osssys_6_0_0_offset.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "navi10_enum.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
#include "nbio_v4_3.h"
#include "gfxhub_v3_0.h"
#include "mmhub_v3_0.h"
#include "mmhub_v3_0_2.h"
#include "athub_v3_0.h"
static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
{
return 0;
}
static int
gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src, unsigned type,
enum amdgpu_interrupt_state state)
{
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
/* MM HUB */
amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false);
/* GFX HUB */
amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
/* MM HUB */
amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true);
/* GFX HUB */
amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true);
break;
default:
break;
}
return 0;
}
static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
uint32_t status = 0;
u64 addr;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (!amdgpu_sriov_vf(adev)) {
/*
* Issue a dummy read to wait for the status register to
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
if (entry->vmid_src == AMDGPU_GFXHUB_0)
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
}
if (printk_ratelimit()) {
struct amdgpu_task_info task_info;
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
dev_err(adev->dev,
"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
"for process %s pid %d thread %s pid %d)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
entry->pasid, task_info.process_name, task_info.tgid,
task_info.task_name, task_info.pid);
dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
addr, entry->client_id);
if (!amdgpu_sriov_vf(adev))
hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
}
return 0;
}
static const struct amdgpu_irq_src_funcs gmc_v11_0_irq_funcs = {
.set = gmc_v11_0_vm_fault_interrupt_state,
.process = gmc_v11_0_process_interrupt,
};
static const struct amdgpu_irq_src_funcs gmc_v11_0_ecc_funcs = {
.set = gmc_v11_0_ecc_interrupt_state,
.process = amdgpu_umc_process_ecc_irq,
};
static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->gmc.vm_fault.num_types = 1;
adev->gmc.vm_fault.funcs = &gmc_v11_0_irq_funcs;
if (!amdgpu_sriov_vf(adev)) {
adev->gmc.ecc_irq.num_types = 1;
adev->gmc.ecc_irq.funcs = &gmc_v11_0_ecc_funcs;
}
}
/**
* gmc_v11_0_use_invalidate_semaphore - judge whether to use semaphore
*
* @adev: amdgpu_device pointer
* @vmhub: vmhub type
*
*/
static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
return ((vmhub == AMDGPU_MMHUB_0) &&
(!amdgpu_sriov_vf(adev)));
}
static bool gmc_v11_0_get_vmid_pasid_mapping_info(
struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
*p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
return !!(*p_pasid);
}
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
* VMIDs 1-15 are used for userspace clients and are handled
* by the amdgpu vm/hsa code.
*/
static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
u32 tmp;
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
spin_lock(&adev->gmc.invalidate_lock);
/*
* It may lose gpuvm invalidate acknowldege state across power-gating
* off cycle, add semaphore acquire before invalidation and semaphore
* release after invalidation to avoid entering power gated state
* to WA the Issue
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng);
if (tmp & 0x1)
break;
udelay(1);
}
if (i >= adev->usec_timeout)
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng);
tmp &= 1 << vmid;
if (tmp)
break;
udelay(1);
}
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
/* Issue additional private vm invalidation to MMHUB */
if ((vmhub != AMDGPU_GFXHUB_0) &&
(hub->vm_l2_bank_select_reserved_cid2)) {
inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
/* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
inv_req |= (1 << 25);
/* Issue private invalidation */
WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
/* Read back to ensure invalidation is done*/
RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
}
spin_unlock(&adev->gmc.invalidate_lock);
if (i < adev->usec_timeout)
return;
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
/**
* gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
* Flush the TLB for the requested page table.
*/
static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
if ((vmhub == AMDGPU_GFXHUB_0) && !adev->gfx.is_poweron)
return;
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
return;
}
mutex_lock(&adev->mman.gtt_window_lock);
gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
/**
* gmc_v11_0_flush_gpu_tlb_pasid - tlb flush via pasid
*
* @adev: amdgpu_device pointer
* @pasid: pasid to be flush
*
* Flush the TLB for the requested pasid.
*/
static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
bool all_hub)
{
int vmid, i;
signed long r;
uint32_t seq;
uint16_t queried_pasid;
bool ret;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(&adev->gfx.kiq.ring_lock);
/* 2 dwords flush + 8 dwords fence */
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, flush_type, all_hub);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r) {
amdgpu_ring_undo(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
return -ETIME;
}
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
return -ETIME;
}
return 0;
}
for (vmid = 1; vmid < 16; vmid++) {
ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
for (i = 0; i < adev->num_vmhubs; i++)
gmc_v11_0_flush_gpu_tlb(adev, vmid,
i, flush_type);
} else {
gmc_v11_0_flush_gpu_tlb(adev, vmid,
AMDGPU_GFXHUB_0, flush_type);
}
break;
}
}
return 0;
}
static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
/*
* It may lose gpuvm invalidate acknowldege state across power-gating
* off cycle, add semaphore acquire before invalidation and semaphore
* release after invalidation to avoid entering power gated state
* to WA the Issue
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0x1, 0x1);
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
(hub->ctx_addr_distance * vmid),
lower_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
(hub->ctx_addr_distance * vmid),
upper_32_bits(pd_addr));
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
hub->eng_distance * eng,
hub->vm_inv_eng0_ack +
hub->eng_distance * eng,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
return pd_addr;
}
static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
unsigned pasid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
/* MES fw manages IH_VMID_x_LUT updating */
if (ring->is_mes_queue)
return;
if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
amdgpu_ring_emit_wreg(ring, reg, pasid);
}
/*
* PTE format:
* 63:59 reserved
* 58:57 reserved
* 56 F
* 55 L
* 54 reserved
* 53:52 SW
* 51 T
* 50:48 mtype
* 47:12 4k physical page base address
* 11:7 fragment
* 6 write
* 5 read
* 4 exe
* 3 Z
* 2 snooped
* 1 system
* 0 valid
*
* PDE format:
* 63:59 block fragment size
* 58:55 reserved
* 54 P
* 53:48 reserved
* 47:6 physical base address of PD or PTE
* 5:3 reserved
* 2 C
* 1 system
* 0 valid
*/
static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
{
switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
case AMDGPU_VM_MTYPE_CC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
default:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
}
}
static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
*addr = adev->vm_manager.vram_base_offset + *addr -
adev->gmc.vram_start;
BUG_ON(*addr & 0xFFFF00000000003FULL);
if (!adev->gmc.translate_further)
return;
if (level == AMDGPU_VM_PDB1) {
/* Set the block fragment size */
if (!(*flags & AMDGPU_PDE_PTE))
*flags |= AMDGPU_PDE_BFS(0x9);
} else if (level == AMDGPU_VM_PDB0) {
if (*flags & AMDGPU_PDE_PTE)
*flags &= ~AMDGPU_PDE_PTE;
else
*flags |= AMDGPU_PTE_TF;
}
}
static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
*flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
*flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
if (mapping->flags & AMDGPU_PTE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;
*flags |= AMDGPU_PTE_LOG;
*flags |= AMDGPU_PTE_SYSTEM;
*flags &= ~AMDGPU_PTE_VALID;
}
}
static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
return 0;
}
static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v11_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
.map_mtype = gmc_v11_0_map_mtype,
.get_vm_pde = gmc_v11_0_get_vm_pde,
.get_vm_pte = gmc_v11_0_get_vm_pte,
.get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
};
static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev)
{
adev->gmc.gmc_funcs = &gmc_v11_0_gmc_funcs;
}
static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[UMC_HWIP][0]) {
case IP_VERSION(8, 10, 0):
case IP_VERSION(8, 11, 0):
break;
default:
break;
}
}
static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[MMHUB_HWIP][0]) {
case IP_VERSION(3, 0, 2):
adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
break;
default:
adev->mmhub.funcs = &mmhub_v3_0_funcs;
break;
}
}
static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
adev->gfxhub.funcs = &gfxhub_v3_0_funcs;
}
static int gmc_v11_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v11_0_set_gfxhub_funcs(adev);
gmc_v11_0_set_mmhub_funcs(adev);
gmc_v11_0_set_gmc_funcs(adev);
gmc_v11_0_set_irq_funcs(adev);
gmc_v11_0_set_umc_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
return 0;
}
static int gmc_v11_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
if (r)
return r;
r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
}
static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 base = 0;
base = adev->mmhub.funcs->get_fb_location(adev);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
}
/**
* gmc_v11_0_mc_init - initialize the memory controller driver params
*
* @adev: amdgpu_device pointer
*
* Look up the amount of vram, vram width, and decide how to place
* vram and gart within the GPU's physical address space.
* Returns 0 for success.
*/
static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
{
int r;
/* size in MB on si */
adev->gmc.mc_vram_size =
adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
}
adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
/* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
adev->gmc.gart_size = 512ULL << 20;
} else
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
gmc_v11_0_vram_gtt_location(adev, &adev->gmc);
return 0;
}
static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
{
int r;
if (adev->gart.bo) {
WARN(1, "PCIE GART already initialized\n");
return 0;
}
/* Initialize common gart structure */
r = amdgpu_gart_init(adev);
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
static int gmc_v11_0_sw_init(void *handle)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->mmhub.funcs->init(adev);
spin_lock_init(&adev->gmc.invalidate_lock);
r = amdgpu_atomfirmware_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor);
adev->gmc.vram_width = vram_width;
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size,
* block size 512 (9bit)
*/
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
break;
default:
break;
}
/* This interrupt is VMC page fault.*/
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
if (r)
return r;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
if (r)
return r;
if (!amdgpu_sriov_vf(adev)) {
/* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
if (r)
return r;
}
/*
* Set the internal MC address mask This is the max address of the GPU's
* internal address space.
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
return r;
}
r = gmc_v11_0_mc_init(adev);
if (r)
return r;
amdgpu_gmc_get_vbios_allocations(adev);
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
return r;
r = gmc_v11_0_gart_init(adev);
if (r)
return r;
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.first_kfd_vmid = 8;
amdgpu_vm_manager_init(adev);
return 0;
}
/**
* gmc_v11_0_gart_fini - vm fini callback
*
* @adev: amdgpu_device pointer
*
* Tears down the driver GART/VM setup (CIK).
*/
static void gmc_v11_0_gart_fini(struct amdgpu_device *adev)
{
amdgpu_gart_table_vram_free(adev);
}
static int gmc_v11_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_vm_manager_fini(adev);
gmc_v11_0_gart_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
return 0;
}
static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev)
{
}
/**
* gmc_v11_0_gart_enable - gart enable
*
* @adev: amdgpu_device pointer
*/
static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
{
int r;
bool value;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
return r;
/* Flush HDP after it is initialized */
adev->hdp.funcs->flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
adev->mmhub.funcs->set_fault_enable_default(adev, value);
gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
return 0;
}
static int gmc_v11_0_hw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* The sequence of these two function calls matters.*/
gmc_v11_0_init_golden_registers(adev);
r = gmc_v11_0_gart_enable(adev);
if (r)
return r;
if (adev->umc.funcs && adev->umc.funcs->init_registers)
adev->umc.funcs->init_registers(adev);
return 0;
}
/**
* gmc_v11_0_gart_disable - gart disable
*
* @adev: amdgpu_device pointer
*
* This disables all VM page table.
*/
static void gmc_v11_0_gart_disable(struct amdgpu_device *adev)
{
adev->mmhub.funcs->gart_disable(adev);
}
static int gmc_v11_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
return 0;
}
amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v11_0_gart_disable(adev);
return 0;
}
static int gmc_v11_0_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v11_0_hw_fini(adev);
return 0;
}
static int gmc_v11_0_resume(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = gmc_v11_0_hw_init(adev);
if (r)
return r;
amdgpu_vmid_reset_all(adev);
return 0;
}
static bool gmc_v11_0_is_idle(void *handle)
{
/* MC is always ready in GMC v11.*/
return true;
}
static int gmc_v11_0_wait_for_idle(void *handle)
{
/* There is no need to wait for MC idle in GMC v11.*/
return 0;
}
static int gmc_v11_0_soft_reset(void *handle)
{
return 0;
}
static int gmc_v11_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
return r;
return athub_v3_0_set_clockgating(adev, state);
}
static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v3_0_get_clockgating(adev, flags);
}
static int gmc_v11_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
return 0;
}
const struct amd_ip_funcs gmc_v11_0_ip_funcs = {
.name = "gmc_v11_0",
.early_init = gmc_v11_0_early_init,
.sw_init = gmc_v11_0_sw_init,
.hw_init = gmc_v11_0_hw_init,
.late_init = gmc_v11_0_late_init,
.sw_fini = gmc_v11_0_sw_fini,
.hw_fini = gmc_v11_0_hw_fini,
.suspend = gmc_v11_0_suspend,
.resume = gmc_v11_0_resume,
.is_idle = gmc_v11_0_is_idle,
.wait_for_idle = gmc_v11_0_wait_for_idle,
.soft_reset = gmc_v11_0_soft_reset,
.set_clockgating_state = gmc_v11_0_set_clockgating_state,
.set_powergating_state = gmc_v11_0_set_powergating_state,
.get_clockgating_state = gmc_v11_0_get_clockgating_state,
};
const struct amdgpu_ip_block_version gmc_v11_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GMC,
.major = 11,
.minor = 0,
.rev = 0,
.funcs = &gmc_v11_0_ip_funcs,
};

View file

@ -0,0 +1,30 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GMC_V11_0_H__
#define __GMC_V11_0_H__
extern const struct amd_ip_funcs gmc_v11_0_ip_funcs;
extern const struct amdgpu_ip_block_version gmc_v11_0_ip_block;
#endif

View file

@ -0,0 +1,45 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "hdp_v5_2.h"
#include "hdp/hdp_5_2_1_offset.h"
#include "hdp/hdp_5_2_1_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
else
amdgpu_ring_emit_wreg(ring,
(adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
}
const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
.flush_hdp = hdp_v5_2_flush_hdp,
};

View file

@ -0,0 +1,31 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __HDP_V5_2_H__
#define __HDP_V5_2_H__
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v5_2_funcs;
#endif

View file

@ -0,0 +1,142 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "hdp_v6_0.h"
#include "hdp/hdp_6_0_0_offset.h"
#include "hdp/hdp_6_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t hdp_clk_cntl, hdp_clk_cntl1;
uint32_t hdp_mem_pwr_cntl;
if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_HDP_DS |
AMD_CG_SUPPORT_HDP_SD)))
return;
hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL);
hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
/* Before doing clock/power mode switch,
* forced on IPH & RC clock */
hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
RC_MEM_CLK_SOFT_OVERRIDE, 1);
WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
/* disable clock and power gating before any changing */
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_CTRL_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_LS_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_DS_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_SD_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_CTRL_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_LS_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_DS_EN, 0);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_SD_EN, 0);
WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
/* Already disabled above. The actions below are for "enabled" only */
if (enable) {
/* only one clock gating mode (LS/DS/SD) can be enabled */
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_SD_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_SD_EN, 1);
} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_LS_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_LS_EN, 1);
} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_DS_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
HDP_MEM_POWER_CTRL,
RC_MEM_POWER_DS_EN, 1);
}
/* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
* be set for SRAM LS/DS/SD */
if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
AMD_CG_SUPPORT_HDP_SD)) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
ATOMIC_MEM_POWER_CTRL_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_CTRL_EN, 1);
WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
}
}
/* disable IPH & RC clock override after clock/power mode changing */
hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
RC_MEM_CLK_SOFT_OVERRIDE, 0);
WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
}
static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
u64 *flags)
{
uint32_t tmp;
/* AMD_CG_SUPPORT_HDP_LS/DS/SD */
tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_HDP_LS;
else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
*flags |= AMD_CG_SUPPORT_HDP_DS;
else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
*flags |= AMD_CG_SUPPORT_HDP_SD;
}
const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
.flush_hdp = hdp_v6_0_flush_hdp,
.update_clock_gating = hdp_v6_0_update_clock_gating,
.get_clock_gating_state = hdp_v6_0_get_clockgating_state,
};

View file

@ -0,0 +1,31 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __HDP_V6_0_H__
#define __HDP_V6_0_H__
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v6_0_funcs;
#endif

View file

@ -308,14 +308,9 @@ static int iceland_ih_sw_fini(void *handle)
static int iceland_ih_hw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = iceland_ih_irq_init(adev);
if (r)
return r;
return 0;
return iceland_ih_irq_init(adev);
}
static int iceland_ih_hw_fini(void *handle)

View file

@ -0,0 +1,745 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/pci.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "oss/osssys_6_0_0_offset.h"
#include "oss/osssys_6_0_0_sh_mask.h"
#include "soc15_common.h"
#include "ih_v6_0.h"
#define MAX_REARM_RETRY 10
static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
/**
* ih_v6_0_init_register_offset - Initialize register offset for ih rings
*
* @adev: amdgpu_device pointer
*
* Initialize register offset ih rings (IH_V6_0).
*/
static void ih_v6_0_init_register_offset(struct amdgpu_device *adev)
{
struct amdgpu_ih_regs *ih_regs;
/* ih ring 2 is removed
* ih ring and ih ring 1 are available */
if (adev->irq.ih.ring_size) {
ih_regs = &adev->irq.ih.ih_regs;
ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
}
if (adev->irq.ih1.ring_size) {
ih_regs = &adev->irq.ih1.ih_regs;
ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
}
}
/**
* force_update_wptr_for_self_int - Force update the wptr for self interrupt
*
* @adev: amdgpu_device pointer
* @threshold: threshold to trigger the wptr reporting
* @timeout: timeout to trigger the wptr reporting
* @enabled: Enable/disable timeout flush mechanism
*
* threshold input range: 0 ~ 15, default 0,
* real_threshold = 2^threshold
* timeout input range: 0 ~ 20, default 8,
* real_timeout = (2^timeout) * 1024 / (socclk_freq)
*
* Force update wptr for self interrupt ( >= SIENNA_CICHLID).
*/
static void
force_update_wptr_for_self_int(struct amdgpu_device *adev,
u32 threshold, u32 timeout, bool enabled)
{
u32 ih_cntl, ih_rb_cntl;
ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_USED_INT_THRESHOLD, threshold);
WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
}
/**
* ih_v6_0_toggle_ring_interrupts - toggle the interrupt ring buffer
*
* @adev: amdgpu_device pointer
* @ih: amdgpu_ih_ring pointet
* @enable: true - enable the interrupts, false - disable the interrupts
*
* Toggle the interrupt ring buffer (IH_V6_0)
*/
static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih,
bool enable)
{
struct amdgpu_ih_regs *ih_regs;
uint32_t tmp;
ih_regs = &ih->ih_regs;
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
WREG32(ih_regs->ih_rb_cntl, tmp);
if (enable) {
ih->enabled = true;
} else {
/* set rptr, wptr to 0 */
WREG32(ih_regs->ih_rb_rptr, 0);
WREG32(ih_regs->ih_rb_wptr, 0);
ih->enabled = false;
ih->rptr = 0;
}
return 0;
}
/**
* ih_v6_0_toggle_interrupts - Toggle all the available interrupt ring buffers
*
* @adev: amdgpu_device pointer
* @enable: enable or disable interrupt ring buffers
*
* Toggle all the available interrupt ring buffers (IH_V6_0).
*/
static int ih_v6_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
{
struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
int i;
int r;
for (i = 0; i < ARRAY_SIZE(ih); i++) {
if (ih[i]->ring_size) {
r = ih_v6_0_toggle_ring_interrupts(adev, ih[i], enable);
if (r)
return r;
}
}
return 0;
}
static uint32_t ih_v6_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
{
int rb_bufsz = order_base_2(ih->ring_size / 4);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
MC_SPACE, ih->use_bus_addr ? 2 : 4);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
WPTR_OVERFLOW_CLEAR, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
WPTR_OVERFLOW_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
/* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
* value is written to memory
*/
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
WPTR_WRITEBACK_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
return ih_rb_cntl;
}
static uint32_t ih_v6_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
{
u32 ih_doorbell_rtpr = 0;
if (ih->use_doorbell) {
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
IH_DOORBELL_RPTR, OFFSET,
ih->doorbell_index);
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
IH_DOORBELL_RPTR,
ENABLE, 1);
} else {
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
IH_DOORBELL_RPTR,
ENABLE, 0);
}
return ih_doorbell_rtpr;
}
/**
* ih_v6_0_enable_ring - enable an ih ring buffer
*
* @adev: amdgpu_device pointer
* @ih: amdgpu_ih_ring pointer
*
* Enable an ih ring buffer (IH_V6_0)
*/
static int ih_v6_0_enable_ring(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
struct amdgpu_ih_regs *ih_regs;
uint32_t tmp;
ih_regs = &ih->ih_regs;
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = ih_v6_0_rb_cntl(ih, tmp);
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
if (ih == &adev->irq.ih1) {
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
}
WREG32(ih_regs->ih_rb_cntl, tmp);
if (ih == &adev->irq.ih) {
/* set the ih ring 0 writeback address whether it's enabled or not */
WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
}
/* set rptr, wptr to 0 */
WREG32(ih_regs->ih_rb_wptr, 0);
WREG32(ih_regs->ih_rb_rptr, 0);
WREG32(ih_regs->ih_doorbell_rptr, ih_v6_0_doorbell_rptr(ih));
return 0;
}
/**
* ih_v6_0_irq_init - init and enable the interrupt ring
*
* @adev: amdgpu_device pointer
*
* Allocate a ring buffer for the interrupt controller,
* enable the RLC, disable interrupts, enable the IH
* ring buffer and enable it.
* Called at device load and reume.
* Returns 0 for success, errors for failure.
*/
static int ih_v6_0_irq_init(struct amdgpu_device *adev)
{
struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
u32 ih_chicken;
u32 tmp;
int ret;
int i;
/* disable irqs */
ret = ih_v6_0_toggle_interrupts(adev, false);
if (ret)
return ret;
adev->nbio.funcs->ih_control(adev);
if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
(adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
if (ih[0]->use_bus_addr) {
ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
}
}
for (i = 0; i < ARRAY_SIZE(ih); i++) {
if (ih[i]->ring_size) {
ret = ih_v6_0_enable_ring(adev, ih[i]);
if (ret)
return ret;
}
}
/* update doorbell range for ih ring 0 */
adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
ih[0]->doorbell_index);
tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
CLIENT18_IS_STORM_CLIENT, 1);
WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
/* GC/MMHUB UTCL2 page fault interrupts are configured as
* MSI storm capable interrupts by deafult. The delay is
* used to avoid ISR being called too frequently
* when page fault happens on several continuous page
* and thus avoid MSI storm */
tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
DELAY, 3);
WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
pci_set_master(adev->pdev);
/* enable interrupts */
ret = ih_v6_0_toggle_interrupts(adev, true);
if (ret)
return ret;
/* enable wptr force update for self int */
force_update_wptr_for_self_int(adev, 0, 8, true);
if (adev->irq.ih_soft.ring_size)
adev->irq.ih_soft.enabled = true;
return 0;
}
/**
* ih_v6_0_irq_disable - disable interrupts
*
* @adev: amdgpu_device pointer
*
* Disable interrupts on the hw.
*/
static void ih_v6_0_irq_disable(struct amdgpu_device *adev)
{
force_update_wptr_for_self_int(adev, 0, 8, false);
ih_v6_0_toggle_interrupts(adev, false);
/* Wait and acknowledge irq */
mdelay(1);
}
/**
* ih_v6_0_get_wptr - get the IH ring buffer wptr
*
* @adev: amdgpu_device pointer
*
* Get the IH ring buffer wptr from either the register
* or the writeback memory buffer. Also check for
* ring buffer overflow and deal with it.
* Returns the value of the wptr.
*/
static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
wptr = le32_to_cpu(*ih->wptr_cpu);
ih_regs = &ih->ih_regs;
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 32). Hopefully
* this should allow us to catch up.
*/
tmp = (wptr + 32) & ih->ptr_mask;
dev_warn(adev->dev, "IH ring buffer overflow "
"(0x%08X, 0x%08X, 0x%08X)\n",
wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
/**
* ih_v6_0_irq_rearm - rearm IRQ if lost
*
* @adev: amdgpu_device pointer
*
*/
static void ih_v6_0_irq_rearm(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
uint32_t v = 0;
uint32_t i = 0;
struct amdgpu_ih_regs *ih_regs;
ih_regs = &ih->ih_regs;
/* Rearm IRQ / re-write doorbell if doorbell write is lost */
for (i = 0; i < MAX_REARM_RETRY; i++) {
v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
if ((v < ih->ring_size) && (v != ih->rptr))
WDOORBELL32(ih->doorbell_index, ih->rptr);
else
break;
}
}
/**
* ih_v6_0_set_rptr - set the IH ring buffer rptr
*
* @adev: amdgpu_device pointer
*
* Set the IH ring buffer rptr.
*/
static void ih_v6_0_set_rptr(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
struct amdgpu_ih_regs *ih_regs;
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
WDOORBELL32(ih->doorbell_index, ih->rptr);
if (amdgpu_sriov_vf(adev))
ih_v6_0_irq_rearm(adev, ih);
} else {
ih_regs = &ih->ih_regs;
WREG32(ih_regs->ih_rb_rptr, ih->rptr);
}
}
/**
* ih_v6_0_self_irq - dispatch work for ring 1
*
* @adev: amdgpu_device pointer
* @source: irq source
* @entry: IV with WPTR update
*
* Update the WPTR from the IV and schedule work to handle the entries.
*/
static int ih_v6_0_self_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
uint32_t wptr = cpu_to_le32(entry->src_data[0]);
switch (entry->ring_id) {
case 1:
*adev->irq.ih1.wptr_cpu = wptr;
schedule_work(&adev->irq.ih1_work);
break;
default: break;
}
return 0;
}
static const struct amdgpu_irq_src_funcs ih_v6_0_self_irq_funcs = {
.process = ih_v6_0_self_irq,
};
static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev)
{
adev->irq.self_irq.num_types = 0;
adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs;
}
static int ih_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
ih_v6_0_set_interrupt_funcs(adev);
ih_v6_0_set_self_irq_funcs(adev);
return 0;
}
static int ih_v6_0_sw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
&adev->irq.self_irq);
if (r)
return r;
/* use gpu virtual address for ih ring
* until ih_checken is programmed to allow
* use bus address for ih ring by psp bl */
use_bus_addr =
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
if (r)
return r;
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
adev->irq.ih1.ring_size = 0;
adev->irq.ih2.ring_size = 0;
/* initialize ih control register offset */
ih_v6_0_init_register_offset(adev);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
if (r)
return r;
r = amdgpu_irq_init(adev);
return r;
}
static int ih_v6_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini_sw(adev);
return 0;
}
static int ih_v6_0_hw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = ih_v6_0_irq_init(adev);
if (r)
return r;
return 0;
}
static int ih_v6_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
ih_v6_0_irq_disable(adev);
return 0;
}
static int ih_v6_0_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return ih_v6_0_hw_fini(adev);
}
static int ih_v6_0_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return ih_v6_0_hw_init(adev);
}
static bool ih_v6_0_is_idle(void *handle)
{
/* todo */
return true;
}
static int ih_v6_0_wait_for_idle(void *handle)
{
/* todo */
return -ETIMEDOUT;
}
static int ih_v6_0_soft_reset(void *handle)
{
/* todo */
return 0;
}
static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
bool enable)
{
uint32_t data, def, field_val;
if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
field_val = enable ? 0 : 1;
data = REG_SET_FIELD(data, IH_CLK_CTRL,
DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
data = REG_SET_FIELD(data, IH_CLK_CTRL,
OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
data = REG_SET_FIELD(data, IH_CLK_CTRL,
LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
data = REG_SET_FIELD(data, IH_CLK_CTRL,
DYN_CLK_SOFT_OVERRIDE, field_val);
data = REG_SET_FIELD(data, IH_CLK_CTRL,
REG_CLK_SOFT_OVERRIDE, field_val);
if (def != data)
WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
}
return;
}
static int ih_v6_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
ih_v6_0_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t ih_mem_pwr_cntl;
/* Disable ih sram power cntl before switch powergating mode */
ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_CTRL_EN, 0);
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
/* It is recommended to set mem powergating mode to DS mode */
if (enable) {
/* mem power mode */
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_LS_EN, 0);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_DS_EN, 1);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_SD_EN, 0);
/* cam mem power mode */
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
/* re-enable power cntl */
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_CTRL_EN, 1);
} else {
/* mem power mode */
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_LS_EN, 0);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_DS_EN, 0);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_SD_EN, 0);
/* cam mem power mode */
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
/* re-enable power cntl*/
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
IH_BUFFER_MEM_POWER_CTRL_EN, 1);
}
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
static int ih_v6_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
ih_v6_0_update_ih_mem_power_gating(adev, enable);
return 0;
}
static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
return;
}
static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
.name = "ih_v6_0",
.early_init = ih_v6_0_early_init,
.late_init = NULL,
.sw_init = ih_v6_0_sw_init,
.sw_fini = ih_v6_0_sw_fini,
.hw_init = ih_v6_0_hw_init,
.hw_fini = ih_v6_0_hw_fini,
.suspend = ih_v6_0_suspend,
.resume = ih_v6_0_resume,
.is_idle = ih_v6_0_is_idle,
.wait_for_idle = ih_v6_0_wait_for_idle,
.soft_reset = ih_v6_0_soft_reset,
.set_clockgating_state = ih_v6_0_set_clockgating_state,
.set_powergating_state = ih_v6_0_set_powergating_state,
.get_clockgating_state = ih_v6_0_get_clockgating_state,
};
static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
.get_wptr = ih_v6_0_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
.set_rptr = ih_v6_0_set_rptr
};
static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev)
{
adev->irq.ih_funcs = &ih_v6_0_funcs;
}
const struct amdgpu_ip_block_version ih_v6_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 6,
.minor = 0,
.rev = 0,
.funcs = &ih_v6_0_ip_funcs,
};

View file

@ -0,0 +1,28 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __IH_V6_0_IH_H__
#define __IH_V6_0_IH_H__
extern const struct amdgpu_ip_block_version ih_v6_0_ip_block;
#endif

View file

@ -0,0 +1,367 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_imu.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[40];
char ucode_prefix[30];
int err;
const struct imu_firmware_header_v1_0 *imu_hdr;
struct amdgpu_firmware_info *info = NULL;
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_imu.bin", ucode_prefix);
err = request_firmware(&adev->gfx.imu_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->gfx.imu_fw);
if (err)
goto out;
imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
//adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
info->fw = adev->gfx.imu_fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
info->fw = adev->gfx.imu_fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
}
out:
if (err) {
dev_err(adev->dev,
"gfx11: Failed to load firmware \"%s\"\n",
fw_name);
release_firmware(adev->gfx.imu_fw);
}
return err;
}
static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
{
const struct imu_firmware_header_v1_0 *hdr;
const __le32 *fw_data;
unsigned i, fw_size;
if (!adev->gfx.imu_fw)
return -EINVAL;
hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
//amdgpu_ucode_print_rlc_hdr(&hdr->header);
fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
for (i = 0; i < fw_size; i++)
WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
for (i = 0; i < fw_size; i++)
WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
return 0;
}
static void imu_v11_0_setup(struct amdgpu_device *adev)
{
int imu_reg_val;
//enable IMU debug mode
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
imu_reg_val |= 0x1;
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x10007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
}
static int imu_v11_0_start(struct amdgpu_device *adev)
{
int imu_reg_val, i;
//Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
imu_reg_val &= 0xfffffffe;
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
for (i = 0; i < adev->usec_timeout; i++) {
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
if ((imu_reg_val & 0x1f) == 0x1f)
break;
udelay(1);
}
if (i >= adev->usec_timeout) {
dev_err(adev->dev, "init imu: IMU start timeout\n");
return -ETIMEDOUT;
}
return 0;
}
static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
{
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS , 0x003f3fff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS , 0x003f3fbf, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10201000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000080, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000003f7, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x0fffff01, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000fffff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000545, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13455431, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x13455431, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76027602, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x76207620, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000345, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x0000003e, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
};
static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] =
{
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ef, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x00000f01, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x000000e4, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x000000e4, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x01231023, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000243, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000002, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000001ff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00002825, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
};
void program_imu_rlc_ram(struct amdgpu_device *adev,
const struct imu_rlc_ram_golden *regs,
const u32 array_size)
{
const struct imu_rlc_ram_golden *entry;
u32 reg, data;
int i;
for (i = 0; i < array_size; ++i) {
entry = &regs[i];
reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
reg |= entry->addr_mask;
data = entry->data;
if (entry->reg == regGCMC_VM_AGP_BASE)
data = 0x00ffffff;
else if (entry->reg == regGCMC_VM_AGP_TOP)
data = 0x0;
else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
data = adev->gmc.vram_start >> 24;
else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
data = adev->gmc.vram_end >> 24;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
}
//Indicate the latest entry
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
}
static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev)
{
u32 reg_data;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
program_imu_rlc_ram(adev, imu_rlc_ram_golden_11,
(const u32)ARRAY_SIZE(imu_rlc_ram_golden_11));
break;
case IP_VERSION(11, 0, 2):
program_imu_rlc_ram(adev, imu_rlc_ram_golden_11_0_2,
(const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_2));
break;
default:
BUG();
break;
}
//Indicate the contents of the RAM are valid
reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
}
const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
.init_microcode = imu_v11_0_init_microcode,
.load_microcode = imu_v11_0_load_microcode,
.setup_imu = imu_v11_0_setup,
.start_imu = imu_v11_0_start,
.program_rlc_ram = imu_v11_0_program_rlc_ram,
};

View file

@ -0,0 +1,30 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __IMU_V11_0_H__
#define __IMU_V11_0_H__
extern const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs;
#endif

View file

@ -407,7 +407,7 @@ static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
@ -424,7 +424,7 @@ static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));

View file

@ -99,6 +99,18 @@ static int jpeg_v2_5_sw_init(void *handle)
VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
if (r)
return r;
/* JPEG DJPEG POISON EVENT */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq);
if (r)
return r;
/* JPEG EJPEG POISON EVENT */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq);
if (r)
return r;
}
r = amdgpu_jpeg_sw_init(adev);
@ -402,7 +414,7 @@ static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
}
@ -419,7 +431,7 @@ static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
@ -573,6 +585,10 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
case VCN_2_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
break;
case VCN_2_6__SRCID_DJPEG0_POISON:
case VCN_2_6__SRCID_EJPEG0_POISON:
amdgpu_jpeg_process_poison_irq(adev, source, entry);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);

View file

@ -427,7 +427,7 @@ static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
@ -444,7 +444,7 @@ static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));

View file

@ -0,0 +1,609 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "vcn/vcn_4_0_0_offset.h"
#include "vcn/vcn_4_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static int jpeg_v4_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
/**
* jpeg_v4_0_early_init - set function pointers
*
* @handle: amdgpu_device pointer
*
* Set ring and irq function pointers
*/
static int jpeg_v4_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->jpeg.num_jpeg_inst = 1;
jpeg_v4_0_set_dec_ring_funcs(adev);
jpeg_v4_0_set_irq_funcs(adev);
return 0;
}
/**
* jpeg_v4_0_sw_init - sw init for JPEG block
*
* @handle: amdgpu_device pointer
*
* Load firmware and sw initialization
*/
static int jpeg_v4_0_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int r;
/* JPEG TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
if (r)
return r;
r = amdgpu_jpeg_sw_init(adev);
if (r)
return r;
r = amdgpu_jpeg_resume(adev);
if (r)
return r;
ring = &adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
return 0;
}
/**
* jpeg_v4_0_sw_fini - sw fini for JPEG block
*
* @handle: amdgpu_device pointer
*
* JPEG suspend and free up sw allocation
*/
static int jpeg_v4_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
r = amdgpu_jpeg_sw_fini(adev);
return r;
}
/**
* jpeg_v4_0_hw_init - start and test JPEG block
*
* @handle: amdgpu_device pointer
*
*/
static int jpeg_v4_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
int r;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
VCN_JPEG_DB_CTRL__EN_MASK);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
return 0;
}
/**
* jpeg_v4_0_hw_fini - stop the hardware block
*
* @handle: amdgpu_device pointer
*
* Stop the JPEG block, mark ring as not ready any more
*/
static int jpeg_v4_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
cancel_delayed_work_sync(&adev->vcn.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
return 0;
}
/**
* jpeg_v4_0_suspend - suspend JPEG block
*
* @handle: amdgpu_device pointer
*
* HW fini and suspend JPEG block
*/
static int jpeg_v4_0_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = jpeg_v4_0_hw_fini(adev);
if (r)
return r;
r = amdgpu_jpeg_suspend(adev);
return r;
}
/**
* jpeg_v4_0_resume - resume JPEG block
*
* @handle: amdgpu_device pointer
*
* Resume firmware and hw init JPEG block
*/
static int jpeg_v4_0_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = amdgpu_jpeg_resume(adev);
if (r)
return r;
r = jpeg_v4_0_hw_init(adev);
return r;
}
static void jpeg_v4_0_disable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
} else {
data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
}
data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
| JPEG_CGC_GATE__JPEG2_DEC_MASK
| JPEG_CGC_GATE__JMCIF_MASK
| JPEG_CGC_GATE__JRBBM_MASK);
WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
}
static void jpeg_v4_0_enable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
} else {
data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
}
data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
|JPEG_CGC_GATE__JPEG2_DEC_MASK
|JPEG_CGC_GATE__JMCIF_MASK
|JPEG_CGC_GATE__JRBBM_MASK);
WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
}
static int jpeg_v4_0_disable_static_power_gating(struct amdgpu_device *adev)
{
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
uint32_t data = 0;
int r = 0;
data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
r = SOC15_WAIT_ON_RREG(JPEG, 0,
regUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG disable power gating failed\n");
return r;
}
}
/* disable anti hang mechanism */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
/* keep the JPEG in static PG mode */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
return 0;
}
static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev)
{
/* enable anti hang mechanism */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
uint32_t data = 0;
int r = 0;
data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
r = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS,
(2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG enable power gating failed\n");
return r;
}
}
return 0;
}
/**
* jpeg_v4_0_start - start JPEG block
*
* @adev: amdgpu_device pointer
*
* Setup and start the JPEG block
*/
static int jpeg_v4_0_start(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_jpeg(adev, true);
/* disable power gating */
r = jpeg_v4_0_disable_static_power_gating(adev);
if (r)
return r;
/* JPEG disable CGC */
jpeg_v4_0_disable_clock_gating(adev);
/* MJPEG global tiling registers */
WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
adev->gfx.config.gb_addr_config);
/* enable JMI channel */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
~UVD_JMI_CNTL__SOFT_RESET_MASK);
/* enable System Interrupt for JRBC */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
JPEG_SYS_INT_EN__DJRBC_MASK,
~JPEG_SYS_INT_EN__DJRBC_MASK);
WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
return 0;
}
/**
* jpeg_v4_0_stop - stop JPEG block
*
* @adev: amdgpu_device pointer
*
* stop the JPEG block
*/
static int jpeg_v4_0_stop(struct amdgpu_device *adev)
{
int r;
/* reset JMI */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
UVD_JMI_CNTL__SOFT_RESET_MASK,
~UVD_JMI_CNTL__SOFT_RESET_MASK);
jpeg_v4_0_enable_clock_gating(adev);
/* enable power gating */
r = jpeg_v4_0_enable_static_power_gating(adev);
if (r)
return r;
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_jpeg(adev, false);
return 0;
}
/**
* jpeg_v4_0_dec_ring_get_rptr - get read pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware read pointer
*/
static uint64_t jpeg_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
}
/**
* jpeg_v4_0_dec_ring_get_wptr - get write pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware write pointer
*/
static uint64_t jpeg_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
return *ring->wptr_cpu_addr;
else
return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
}
/**
* jpeg_v4_0_dec_ring_set_wptr - set write pointer
*
* @ring: amdgpu_ring pointer
*
* Commits the write pointer to the hardware
*/
static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
}
}
static bool jpeg_v4_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
return ret;
}
static int jpeg_v4_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
static int jpeg_v4_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
if (enable) {
if (!jpeg_v4_0_is_idle(handle))
return -EBUSY;
jpeg_v4_0_enable_clock_gating(adev);
} else {
jpeg_v4_0_disable_clock_gating(adev);
}
return 0;
}
static int jpeg_v4_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
if (state == adev->jpeg.cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
ret = jpeg_v4_0_stop(adev);
else
ret = jpeg_v4_0_start(adev);
if (!ret)
adev->jpeg.cur_state = state;
return ret;
}
static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
return 0;
}
static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: JPEG TRAP\n");
switch (entry->src_id) {
case VCN_4_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
break;
default:
DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
break;
}
return 0;
}
static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
.name = "jpeg_v4_0",
.early_init = jpeg_v4_0_early_init,
.late_init = NULL,
.sw_init = jpeg_v4_0_sw_init,
.sw_fini = jpeg_v4_0_sw_fini,
.hw_init = jpeg_v4_0_hw_init,
.hw_fini = jpeg_v4_0_hw_fini,
.suspend = jpeg_v4_0_suspend,
.resume = jpeg_v4_0_resume,
.is_idle = jpeg_v4_0_is_idle,
.wait_for_idle = jpeg_v4_0_wait_for_idle,
.check_soft_reset = NULL,
.pre_soft_reset = NULL,
.soft_reset = NULL,
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_set_powergating_state,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
.vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* jpeg_v4_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v4_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
.emit_ib_size = 22, /* jpeg_v4_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
.test_ring = amdgpu_jpeg_dec_ring_test_ring,
.test_ib = amdgpu_jpeg_dec_ring_test_ib,
.insert_nop = jpeg_v2_0_dec_ring_nop,
.insert_start = jpeg_v2_0_dec_ring_insert_start,
.insert_end = jpeg_v2_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_jpeg_ring_begin_use,
.end_use = amdgpu_jpeg_ring_end_use,
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_dec_ring_vm_funcs;
DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
.set = jpeg_v4_0_set_interrupt_state,
.process = jpeg_v4_0_process_interrupt,
};
static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->irq.num_types = 1;
adev->jpeg.inst->irq.funcs = &jpeg_v4_0_irq_funcs;
}
const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 4,
.minor = 0,
.rev = 0,
.funcs = &jpeg_v4_0_ip_funcs,
};

View file

@ -0,0 +1,29 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __JPEG_V4_0_H__
#define __JPEG_V4_0_H__
extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
#endif /* __JPEG_V4_0_H__ */

View file

@ -0,0 +1,121 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/delay.h>
#include "amdgpu.h"
#include "lsdma_v6_0.h"
#include "amdgpu_lsdma.h"
#include "lsdma/lsdma_6_0_0_offset.h"
#include "lsdma/lsdma_6_0_0_sh_mask.h"
static int lsdma_v6_0_wait_pio_status(struct amdgpu_device *adev)
{
return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
}
static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev,
uint64_t src_addr,
uint64_t dst_addr,
uint64_t size)
{
int ret;
uint32_t tmp;
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
ret = lsdma_v6_0_wait_pio_status(adev);
if (ret)
dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
return ret;
}
static int lsdma_v6_0_fill_mem(struct amdgpu_device *adev,
uint64_t dst_addr,
uint32_t data,
uint64_t size)
{
int ret;
uint32_t tmp;
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
ret = lsdma_v6_0_wait_pio_status(adev);
if (ret)
dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
return ret;
}
static void lsdma_v6_0_update_memory_power_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t tmp;
tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
}
const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = {
.copy_mem = lsdma_v6_0_copy_mem,
.fill_mem = lsdma_v6_0_fill_mem,
.update_memory_power_gating = lsdma_v6_0_update_memory_power_gating
};

View file

@ -0,0 +1,31 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __LSDMA_V6_0_H__
#define __LSDMA_V6_0_H__
#include "soc15_common.h"
extern const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs;
#endif /* __LSDMA_V6_0_H__ */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MES_V11_0_H__
#define __MES_V11_0_H__
extern const struct amdgpu_ip_block_version mes_v11_0_ip_block;
#endif

View file

@ -408,6 +408,8 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)

View file

@ -324,6 +324,8 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev)

View file

@ -0,0 +1,661 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "mmhub_v3_0.h"
#include "mmhub/mmhub_3_0_0_offset.h"
#include "mmhub/mmhub_3_0_0_sh_mask.h"
#include "navi10_enum.h"
#include "soc15_common.h"
#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
static const char *mmhub_client_ids_v3_0_0[][2] = {
[0][0] = "VMC",
[4][0] = "DCEDMC",
[5][0] = "DCEVGA",
[6][0] = "MP0",
[7][0] = "MP1",
[8][0] = "MPIO",
[16][0] = "HDP",
[17][0] = "LSDMA",
[18][0] = "JPEG",
[19][0] = "VCNU0",
[21][0] = "VSCH",
[22][0] = "VCNU1",
[23][0] = "VCN1",
[32+20][0] = "VCN0",
[2][1] = "DBGUNBIO",
[3][1] = "DCEDWB",
[4][1] = "DCEDMC",
[5][1] = "DCEVGA",
[6][1] = "MP0",
[7][1] = "MP1",
[8][1] = "MPIO",
[10][1] = "DBGU0",
[11][1] = "DBGU1",
[12][1] = "DBGU2",
[13][1] = "DBGU3",
[14][1] = "XDP",
[15][1] = "OSSSYS",
[16][1] = "HDP",
[17][1] = "LSDMA",
[18][1] = "JPEG",
[19][1] = "VCNU0",
[20][1] = "VCN0",
[21][1] = "VSCH",
[22][1] = "VCNU1",
[23][1] = "VCN1",
};
static uint32_t mmhub_v3_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
static void
mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
const char *mmhub_cid = NULL;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, RW);
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
switch (adev->ip_versions[MMHUB_HWIP][0]) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 1):
mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw];
break;
default:
mmhub_cid = NULL;
break;
}
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
dev_err(adev->dev, "\t RW: 0x%x\n", rw);
}
static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void mmhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
mmhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
uint32_t tmp;
/* Disable AGP. */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FFFFFF);
if (!amdgpu_sriov_vf(adev)) {
/*
* the new L1 policy will block SRIOV guest from writing
* these regs, and they will be programed at host.
* so skip programing these regs.
*/
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18);
}
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
static void mmhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
tmp = regMMVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
tmp = regMMVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
}
static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
{
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
0);
}
static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev)
{
/* GART Enable. */
mmhub_v3_0_init_gart_aperture_regs(adev);
mmhub_v3_0_init_system_aperture_regs(adev);
mmhub_v3_0_init_tlb_regs(adev);
mmhub_v3_0_init_cache_regs(adev);
mmhub_v3_0_enable_system_domain(adev);
mmhub_v3_0_disable_identity_aperture(adev);
mmhub_v3_0_setup_vmid_config(adev);
mmhub_v3_0_program_invalidation(adev);
return 0;
}
static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
}
/**
* mmhub_v3_0_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
static void mmhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = {
.print_l2_protection_fault_status = mmhub_v3_0_print_l2_protection_fault_status,
.get_invalidate_req = mmhub_v3_0_get_invalidate_req,
};
static void mmhub_v3_0_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
regMMVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vm_l2_bank_select_reserved_cid2 =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs;
}
static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base;
base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
static u64 mmhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
}
static void mmhub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
#if 0
uint32_t def1, data1, def2 = 0, data2 = 0;
#endif
def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
#if 0
def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
#endif
if (enable) {
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
#if 0
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
#endif
} else {
data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
#if 0
data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
#endif
}
if (def != data)
WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
#if 0
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
if (def2 != data2)
WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
#endif
}
static void mmhub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
if (enable)
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
if (def != data)
WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v3_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
if (amdgpu_sriov_vf(adev))
return 0;
if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
mmhub_v3_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
mmhub_v3_0_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
static void mmhub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
/* AMD_CG_SUPPORT_MC_MGCG */
if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_MGCG;
/* AMD_CG_SUPPORT_MC_LS */
if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_LS;
}
const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs = {
.init = mmhub_v3_0_init,
.get_fb_location = mmhub_v3_0_get_fb_location,
.get_mc_fb_offset = mmhub_v3_0_get_mc_fb_offset,
.gart_enable = mmhub_v3_0_gart_enable,
.set_fault_enable_default = mmhub_v3_0_set_fault_enable_default,
.gart_disable = mmhub_v3_0_gart_disable,
.set_clockgating = mmhub_v3_0_set_clockgating,
.get_clockgating = mmhub_v3_0_get_clockgating,
.setup_vm_pt_regs = mmhub_v3_0_setup_vm_pt_regs,
};

View file

@ -0,0 +1,28 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MMHUB_V3_0_H__
#define __MMHUB_V3_0_H__
extern const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs;
#endif

View file

@ -0,0 +1,571 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "mmhub_v3_0_2.h"
#include "mmhub/mmhub_3_0_2_offset.h"
#include "mmhub/mmhub_3_0_2_sh_mask.h"
#include "navi10_enum.h"
#include "soc15_common.h"
#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
static const char *mmhub_client_ids_v3_0_2[][2] = {
[0][0] = "VMC",
[4][0] = "DCEDMC",
[5][0] = "DCEVGA",
[6][0] = "MP0",
[7][0] = "MP1",
[8][0] = "MPIO",
[16][0] = "HDP",
[17][0] = "LSDMA",
[18][0] = "JPEG",
[19][0] = "VCNU0",
[21][0] = "VSCH",
[22][0] = "VCNU1",
[23][0] = "VCN1",
[32+20][0] = "VCN0",
[2][1] = "DBGUNBIO",
[3][1] = "DCEDWB",
[4][1] = "DCEDMC",
[5][1] = "DCEVGA",
[6][1] = "MP0",
[7][1] = "MP1",
[8][1] = "MPIO",
[10][1] = "DBGU0",
[11][1] = "DBGU1",
[12][1] = "DBGU2",
[13][1] = "DBGU3",
[14][1] = "XDP",
[15][1] = "OSSSYS",
[16][1] = "HDP",
[17][1] = "LSDMA",
[18][1] = "JPEG",
[19][1] = "VCNU0",
[20][1] = "VCN0",
[21][1] = "VSCH",
[22][1] = "VCNU1",
[23][1] = "VCN1",
};
static uint32_t mmhub_v3_0_2_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
static void
mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
const char *mmhub_cid = NULL;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, RW);
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw];
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
dev_err(adev->dev, "\t RW: 0x%x\n", rw);
}
static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void mmhub_v3_0_2_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
mmhub_v3_0_2_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
uint32_t tmp;
/* Disable AGP. */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FFFFFF);
if (!amdgpu_sriov_vf(adev)) {
/*
* the new L1 policy will block SRIOV guest from writing
* these regs, and they will be programed at host.
* so skip programing these regs.
*/
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18);
}
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
static void mmhub_v3_0_2_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
tmp = regMMVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
tmp = regMMVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
}
static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev)
{
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
0);
}
static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev)
{
/* GART Enable. */
mmhub_v3_0_2_init_gart_aperture_regs(adev);
mmhub_v3_0_2_init_system_aperture_regs(adev);
mmhub_v3_0_2_init_tlb_regs(adev);
mmhub_v3_0_2_init_cache_regs(adev);
mmhub_v3_0_2_enable_system_domain(adev);
mmhub_v3_0_2_disable_identity_aperture(adev);
mmhub_v3_0_2_setup_vmid_config(adev);
mmhub_v3_0_2_program_invalidation(adev);
return 0;
}
static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
}
/**
* mmhub_v3_0_2_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
static void mmhub_v3_0_2_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = {
.print_l2_protection_fault_status = mmhub_v3_0_2_print_l2_protection_fault_status,
.get_invalidate_req = mmhub_v3_0_2_get_invalidate_req,
};
static void mmhub_v3_0_2_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
regMMVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vm_l2_bank_select_reserved_cid2 =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs;
}
static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev)
{
u64 base;
base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
static u64 mmhub_v3_0_2_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
}
static void mmhub_v3_0_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
//TODO
}
static void mmhub_v3_0_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
//TODO
}
static int mmhub_v3_0_2_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
if (amdgpu_sriov_vf(adev))
return 0;
mmhub_v3_0_2_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v3_0_2_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
static void mmhub_v3_0_2_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
//TODO
}
const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs = {
.init = mmhub_v3_0_2_init,
.get_fb_location = mmhub_v3_0_2_get_fb_location,
.get_mc_fb_offset = mmhub_v3_0_2_get_mc_fb_offset,
.gart_enable = mmhub_v3_0_2_gart_enable,
.set_fault_enable_default = mmhub_v3_0_2_set_fault_enable_default,
.gart_disable = mmhub_v3_0_2_gart_disable,
.set_clockgating = mmhub_v3_0_2_set_clockgating,
.get_clockgating = mmhub_v3_0_2_get_clockgating,
.setup_vm_pt_regs = mmhub_v3_0_2_setup_vm_pt_regs,
};

View file

@ -0,0 +1,28 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MMHUB_V3_0_2_H__
#define __MMHUB_V3_0_2_H__
extern const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs;
#endif

View file

@ -593,14 +593,9 @@ static int navi10_ih_sw_fini(void *handle)
static int navi10_ih_hw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = navi10_ih_irq_init(adev);
if (r)
return r;
return 0;
return navi10_ih_irq_init(adev);
}
static int navi10_ih_hw_fini(void *handle)

View file

@ -0,0 +1,368 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "nbio_v4_3.h"
#include "nbio/nbio_4_3_0_offset.h"
#include "nbio/nbio_4_3_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
static void nbio_v4_3_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
}
static u32 nbio_v4_3_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
return tmp;
}
static void nbio_v4_3_mc_access_enable(struct amdgpu_device *adev, bool enable)
{
if (enable)
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
else
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
}
static u32 nbio_v4_3_get_memsize(struct amdgpu_device *adev)
{
return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
}
static void nbio_v4_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index,
int doorbell_size)
{
if (instance == 0) {
u32 doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_2_CTRL,
S2A_DOORBELL_PORT2_ENABLE,
0x1);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_2_CTRL,
S2A_DOORBELL_PORT2_AWID,
0xe);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_2_CTRL,
S2A_DOORBELL_PORT2_RANGE_OFFSET,
doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_2_CTRL,
S2A_DOORBELL_PORT2_RANGE_SIZE,
doorbell_size);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_2_CTRL,
S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
0x3);
} else
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_2_CTRL,
S2A_DOORBELL_PORT2_RANGE_SIZE,
0);
WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
}
}
static void nbio_v4_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance)
{
u32 doorbell_range;
if (instance)
doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL);
else
doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_4_CTRL,
S2A_DOORBELL_PORT4_ENABLE,
0x1);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_4_CTRL,
S2A_DOORBELL_PORT4_AWID,
instance ? 0x7 : 0x4);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_4_CTRL,
S2A_DOORBELL_PORT4_RANGE_OFFSET,
doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_4_CTRL,
S2A_DOORBELL_PORT4_RANGE_SIZE,
8);
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_4_CTRL,
S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
instance ? 0x7 : 0x4);
} else
doorbell_range = REG_SET_FIELD(doorbell_range,
S2A_DOORBELL_ENTRY_4_CTRL,
S2A_DOORBELL_PORT4_RANGE_SIZE,
0);
if (instance)
WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
else
WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
}
static void nbio_v4_3_gc_doorbell_init(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
}
static void nbio_v4_3_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
BIF_DOORBELL_APER_EN, enable ? 1 : 0);
}
static void nbio_v4_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
bool enable)
{
u32 tmp = 0;
if (enable) {
tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
DOORBELL_SELFRING_GPA_APER_EN, 1) |
REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
DOORBELL_SELFRING_GPA_APER_MODE, 1) |
REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
DOORBELL_SELFRING_GPA_APER_SIZE, 0);
WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
lower_32_bits(adev->doorbell.base));
WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
upper_32_bits(adev->doorbell.base));
}
WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
tmp);
}
static void nbio_v4_3_ih_doorbell_range(struct amdgpu_device *adev,
bool use_doorbell, int doorbell_index)
{
u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL);
if (use_doorbell) {
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_ENABLE,
0x1);
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWID,
0x0);
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_RANGE_OFFSET,
doorbell_index);
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_RANGE_SIZE,
2);
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
0x0);
} else
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_RANGE_SIZE,
0);
WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
}
static void nbio_v4_3_ih_control(struct amdgpu_device *adev)
{
u32 interrupt_cntl;
/* setup interrupt control */
WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
/*
* BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
*/
interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
IH_DUMMY_RD_OVERRIDE, 0);
/* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
IH_REQ_NONSNOOP_EN, 0);
WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
}
static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
} else {
data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
}
if (def != data)
WREG32_SOC15(NBIO, 0, regCPM_CONTROL, data);
}
static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
/* TODO: need update in future */
def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
} else {
data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
}
if (def != data)
WREG32_SOC15(NBIO, 0, regPCIE_CNTL2, data);
}
static void nbio_v4_3_get_clockgating_state(struct amdgpu_device *adev,
u64 *flags)
{
int data;
/* AMD_CG_SUPPORT_BIF_MGCG */
data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_BIF_MGCG;
/* AMD_CG_SUPPORT_BIF_LS */
data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_BIF_LS;
}
static u32 nbio_v4_3_get_hdp_flush_req_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
}
static u32 nbio_v4_3_get_hdp_flush_done_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
}
static u32 nbio_v4_3_get_pcie_index_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
}
static u32 nbio_v4_3_get_pcie_data_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
}
const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = {
.ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
.ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
.ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
.ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
.ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
.ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
.ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
.ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
.ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
{
return;
}
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
{
u32 data, rom_offset;
data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
return rom_offset;
}
const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
.get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
.get_rev_id = nbio_v4_3_get_rev_id,
.mc_access_enable = nbio_v4_3_mc_access_enable,
.get_memsize = nbio_v4_3_get_memsize,
.sdma_doorbell_range = nbio_v4_3_sdma_doorbell_range,
.vcn_doorbell_range = nbio_v4_3_vcn_doorbell_range,
.gc_doorbell_init = nbio_v4_3_gc_doorbell_init,
.enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v4_3_ih_doorbell_range,
.update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
.update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
.get_clockgating_state = nbio_v4_3_get_clockgating_state,
.ih_control = nbio_v4_3_ih_control,
.init_registers = nbio_v4_3_init_registers,
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
};

View file

@ -0,0 +1,32 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __NBIO_V4_3_H__
#define __NBIO_V4_3_H__
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v4_3_funcs;
#endif

View file

@ -0,0 +1,240 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "nbio_v7_7.h"
#include "nbio/nbio_7_7_0_offset.h"
#include "nbio/nbio_7_7_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
return tmp;
}
static void nbio_v7_7_mc_access_enable(struct amdgpu_device *adev, bool enable)
{
if (enable)
WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
else
WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
}
static u32 nbio_v7_7_get_memsize(struct amdgpu_device *adev)
{
return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
}
static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index,
int doorbell_size)
{
u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE);
u32 doorbell_range = RREG32_PCIE_PORT(reg);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_SDMA0_DOORBELL_RANGE,
OFFSET, doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_SDMA0_DOORBELL_RANGE,
SIZE, doorbell_size);
} else {
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_SDMA0_DOORBELL_RANGE,
SIZE, 0);
}
WREG32_PCIE_PORT(reg, doorbell_range);
}
static void nbio_v7_7_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
u32 reg;
reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
BIF_DOORBELL_APER_EN, enable ? 1 : 0);
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
}
static void nbio_v7_7_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
bool enable)
{
u32 tmp = 0;
if (enable) {
tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
DOORBELL_SELFRING_GPA_APER_EN, 1) |
REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
DOORBELL_SELFRING_GPA_APER_MODE, 1) |
REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
DOORBELL_SELFRING_GPA_APER_SIZE, 0);
WREG32_SOC15(NBIO, 0,
regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
lower_32_bits(adev->doorbell.base));
WREG32_SOC15(NBIO, 0,
regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
upper_32_bits(adev->doorbell.base));
}
WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
tmp);
}
static void nbio_v7_7_ih_doorbell_range(struct amdgpu_device *adev,
bool use_doorbell, int doorbell_index)
{
u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,
regGDC0_BIF_IH_DOORBELL_RANGE);
if (use_doorbell) {
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
doorbell_index);
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
2);
} else {
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
0);
}
WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
ih_doorbell_range);
}
static void nbio_v7_7_ih_control(struct amdgpu_device *adev)
{
u32 interrupt_cntl;
/* setup interrupt control */
WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
/*
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
*/
interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
IH_DUMMY_RD_OVERRIDE, 0);
/* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
IH_REQ_NONSNOOP_EN, 0);
WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
}
static u32 nbio_v7_7_get_hdp_flush_req_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
}
static u32 nbio_v7_7_get_hdp_flush_done_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
}
static u32 nbio_v7_7_get_pcie_index_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
}
static u32 nbio_v7_7_get_pcie_data_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
}
static u32 nbio_v7_7_get_pcie_port_index_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
}
static u32 nbio_v7_7_get_pcie_port_data_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
}
const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg = {
.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;
def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3);
data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
}
const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_7_get_pcie_index_offset,
.get_pcie_data_offset = nbio_v7_7_get_pcie_data_offset,
.get_pcie_port_index_offset = nbio_v7_7_get_pcie_port_index_offset,
.get_pcie_port_data_offset = nbio_v7_7_get_pcie_port_data_offset,
.get_rev_id = nbio_v7_7_get_rev_id,
.mc_access_enable = nbio_v7_7_mc_access_enable,
.get_memsize = nbio_v7_7_get_memsize,
.sdma_doorbell_range = nbio_v7_7_sdma_doorbell_range,
.enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
};

View file

@ -0,0 +1,33 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __NBIO_V7_7_H__
#define __NBIO_V7_7_H__
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_7_funcs;
extern const struct amdgpu_nbio_ras_funcs nbio_v7_7_ras_funcs;
#endif

View file

@ -392,9 +392,9 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) {
en = &nv_allowed_read_registers[i];
if ((i == 7 && (adev->sdma.num_instances == 1)) || /* some asics don't have SDMA1 */
reg_offset !=
(adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset))
if (adev->reg_offset[en->hwip][en->inst] &&
reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ en->reg_offset))
continue;
*value = nv_get_register_value(adev,
@ -607,7 +607,12 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING;
adev->doorbell_index.gfx_userqueue_start =
AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
adev->doorbell_index.gfx_userqueue_end =
AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2;

View file

@ -260,6 +260,36 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
GFX_FW_TYPE_CAP = 62, /* CAP_FW */
GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */
GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */
GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */
GFX_FW_TYPE_LSDMA = 70, /* LSDMA FW SOC21 */
GFX_FW_TYPE_SDMA_UCODE_TH0 = 71, /* SDMA Thread 0/CTX SOC21 */
GFX_FW_TYPE_SDMA_UCODE_TH1 = 72, /* SDMA Thread 1/CTL SOC21 */
GFX_FW_TYPE_PPTABLE = 73, /* PPTABLE SOC21 */
GFX_FW_TYPE_DISCRETE_USB4 = 74, /* dUSB4 FW SOC21 */
GFX_FW_TYPE_TA = 75, /* SRIOV TA FW UUID SOC21 */
GFX_FW_TYPE_RS64_MES = 76, /* RS64 MES ucode SOC21 */
GFX_FW_TYPE_RS64_MES_STACK = 77, /* RS64 MES stack ucode SOC21 */
GFX_FW_TYPE_RS64_KIQ = 78, /* RS64 KIQ ucode SOC21 */
GFX_FW_TYPE_RS64_KIQ_STACK = 79, /* RS64 KIQ Heap stack SOC21 */
GFX_FW_TYPE_ISP_DATA = 80, /* ISP DATA SOC21 */
GFX_FW_TYPE_CP_MES_KIQ = 81, /* MES KIQ ucode SOC21 */
GFX_FW_TYPE_MES_KIQ_STACK = 82, /* MES KIQ stack SOC21 */
GFX_FW_TYPE_UMSCH_DATA = 83, /* User Mode Scheduler Data SOC21 */
GFX_FW_TYPE_UMSCH_UCODE = 84, /* User Mode Scheduler Ucode SOC21 */
GFX_FW_TYPE_UMSCH_CMD_BUFFER = 85, /* User Mode Scheduler Command Buffer SOC21 */
GFX_FW_TYPE_USB_DP_COMBO_PHY = 86, /* USB-Display port Combo SOC21 */
GFX_FW_TYPE_RS64_PFP = 87, /* RS64 PFP SOC21 */
GFX_FW_TYPE_RS64_ME = 88, /* RS64 ME SOC21 */
GFX_FW_TYPE_RS64_MEC = 89, /* RS64 MEC SOC21 */
GFX_FW_TYPE_RS64_PFP_P0_STACK = 90, /* RS64 PFP stack P0 SOC21 */
GFX_FW_TYPE_RS64_PFP_P1_STACK = 91, /* RS64 PFP stack P1 SOC21 */
GFX_FW_TYPE_RS64_ME_P0_STACK = 92, /* RS64 ME stack P0 SOC21 */
GFX_FW_TYPE_RS64_ME_P1_STACK = 93, /* RS64 ME stack P1 SOC21 */
GFX_FW_TYPE_RS64_MEC_P0_STACK = 94, /* RS64 MEC stack P0 SOC21 */
GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */
GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */
GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */
GFX_FW_TYPE_MAX
};

View file

@ -41,6 +41,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@ -48,10 +50,20 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
/* Read USB-PD from LFB */
#define GFX_CMD_USB_PD_USE_LFB 0x480
/* VBIOS gfl defines */
#define MBOX_READY_MASK 0x80000000
#define MBOX_STATUS_MASK 0x0000FFFF
#define MBOX_COMMAND_MASK 0x00FF0000
#define MBOX_READY_FLAG 0x80000000
#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
const char *chip_name;
char ucode_prefix[30];
int err = 0;
switch (adev->ip_versions[MP0_HWIP][0]) {
@ -62,15 +74,12 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 3):
chip_name = "yellow_carp";
break;
case IP_VERSION(13, 0, 5):
chip_name = "psp_13_0_5";
break;
case IP_VERSION(13, 0, 8):
chip_name = "psp_13_0_8";
break;
default:
BUG();
amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
chip_name = ucode_prefix;
break;
}
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 2):
err = psp_init_sos_microcode(psp, chip_name);
@ -94,6 +103,12 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
if (err)
return err;
break;
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
err = psp_init_sos_microcode(psp, chip_name);
if (err)
return err;
break;
default:
BUG();
}
@ -174,6 +189,11 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
}
static int psp_v13_0_bootloader_load_spl(struct psp_context *psp)
{
return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
}
static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp)
{
return psp_v13_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
@ -454,9 +474,85 @@ static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
return ret;
}
static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
{
uint32_t reg_status = 0, reg_val = 0;
struct amdgpu_device *adev = psp->adev;
int ret;
/* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
reg_val |= (cmd << 16);
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115, reg_val);
/* Ring the doorbell */
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
return 0;
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
MBOX_READY_FLAG, MBOX_READY_MASK, false);
if (ret) {
dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
return ret;
}
reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
if ((reg_status & 0xFFFF) != 0) {
dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
cmd, reg_status & 0xFFFF);
return -EIO;
}
return 0;
}
static int psp_v13_0_update_spirom(struct psp_context *psp,
uint64_t fw_pri_mc_addr)
{
struct amdgpu_device *adev = psp->adev;
int ret;
/* Confirm PSP is ready to start */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
MBOX_READY_FLAG, MBOX_READY_MASK, false);
if (ret) {
dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
return ret;
}
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
if (ret)
return ret;
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
if (ret)
return ret;
psp->vbflash_done = true;
ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
if (ret)
return ret;
return 0;
}
static int psp_v13_0_vbflash_status(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
return RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
}
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
.bootloader_load_spl = psp_v13_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
.bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv,
.bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
@ -469,7 +565,9 @@ static const struct psp_funcs psp_v13_0_funcs = {
.ring_get_wptr = psp_v13_0_ring_get_wptr,
.ring_set_wptr = psp_v13_0_ring_set_wptr,
.load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
.read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw
.read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
.update_spirom = psp_v13_0_update_spirom,
.vbflash_stat = psp_v13_0_vbflash_status
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)

View file

@ -194,7 +194,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
return ring->adev->wb.wb[ring->rptr_offs] >> 2;
return *ring->rptr_cpu_addr >> 2;
}
/**
@ -414,12 +414,10 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
u32 wb_offset;
int i, j, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@ -455,9 +453,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);

View file

@ -350,7 +350,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
return ring->adev->wb.wb[ring->rptr_offs] >> 2;
return *ring->rptr_cpu_addr >> 2;
}
/**
@ -367,7 +367,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell || ring->use_pollmem) {
/* XXX check if swapping is necessary on BE */
wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
wptr = *ring->wptr_cpu_addr >> 2;
} else {
wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
}
@ -387,12 +387,12 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
u32 *wb = (u32 *)ring->wptr_cpu_addr;
/* XXX check if swapping is necessary on BE */
WRITE_ONCE(*wb, ring->wptr << 2);
WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
} else if (ring->use_pollmem) {
u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
u32 *wb = (u32 *)ring->wptr_cpu_addr;
WRITE_ONCE(*wb, ring->wptr << 2);
} else {
@ -649,7 +649,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 rb_bufsz;
u32 wb_offset;
u32 doorbell;
u64 wptr_gpu_addr;
int i, j, r;
@ -657,7 +656,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
amdgpu_ring_clear_ring(ring);
wb_offset = (ring->rptr_offs * 4);
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
@ -694,9 +692,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@ -715,7 +713,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO + sdma_offsets[i],
lower_32_bits(wptr_gpu_addr));

View file

@ -722,7 +722,7 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
rptr = ((u64 *)ring->rptr_cpu_addr);
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@ -742,7 +742,7 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
@ -768,7 +768,7 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
u64 *wb = (u64 *)ring->wptr_cpu_addr;
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
@ -811,7 +811,7 @@ static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
wptr = wptr << 32;
@ -833,7 +833,7 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
u64 *wb = (u64 *)ring->wptr_cpu_addr;
/* XXX check if swapping is necessary on BE */
WRITE_ONCE(*wb, (ring->wptr << 2));
@ -1174,13 +1174,10 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
wb_offset = (ring->rptr_offs * 4);
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
@ -1193,9 +1190,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
@ -1225,7 +1222,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
@ -1264,13 +1261,10 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
wb_offset = (ring->rptr_offs * 4);
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
@ -1283,9 +1277,9 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
@ -1316,7 +1310,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,

View file

@ -347,7 +347,7 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
rptr = (u64 *)ring->rptr_cpu_addr;
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@ -367,7 +367,7 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
@ -400,8 +400,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
@ -562,9 +562,11 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
uint32_t ctx = ring->is_mes_queue ?
(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
}
}
@ -708,7 +710,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
@ -718,7 +719,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
wb_offset = (ring->rptr_offs * 4);
if (!amdgpu_sriov_vf(adev))
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
@ -741,7 +741,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
@ -756,9 +756,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@ -961,6 +961,49 @@ static int sdma_v5_0_start(struct amdgpu_device *adev)
return r;
}
static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd,
struct amdgpu_mqd_prop *prop)
{
struct v10_sdma_mqd *m = mqd;
uint64_t wb_gpu_addr;
m->sdmax_rlcx_rb_cntl =
order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
wb_gpu_addr = prop->wptr_gpu_addr;
m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
wb_gpu_addr = prop->rptr_gpu_addr;
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
mmSDMA0_GFX_IB_CNTL));
m->sdmax_rlcx_doorbell_offset =
prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
return 0;
}
static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev)
{
adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init;
}
/**
* sdma_v5_0_ring_test_ring - simple async dma engine test
*
@ -978,18 +1021,29 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
volatile uint32_t *cpu_ptr = NULL;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
return r;
tmp = 0xCAFEDEAD;
if (ring->is_mes_queue) {
uint32_t offset = 0;
offset = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_PADDING_OFFS);
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
*cpu_ptr = tmp;
} else {
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(tmp);
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
@ -1005,7 +1059,10 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
if (ring->is_mes_queue)
tmp = le32_to_cpu(*cpu_ptr);
else
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@ -1017,7 +1074,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
amdgpu_device_wb_free(adev, index);
if (!ring->is_mes_queue)
amdgpu_device_wb_free(adev, index);
return r;
}
@ -1040,22 +1098,38 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
volatile uint32_t *cpu_ptr = NULL;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256,
if (ring->is_mes_queue) {
uint32_t offset = 0;
offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
offset = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_PADDING_OFFS);
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
*cpu_ptr = tmp;
} else {
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ib_get(adev, NULL, 256,
AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
}
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@ -1082,7 +1156,12 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (ring->is_mes_queue)
tmp = le32_to_cpu(*cpu_ptr);
else
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
else
@ -1092,7 +1171,8 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
if (!ring->is_mes_queue)
amdgpu_device_wb_free(adev, index);
return r;
}
@ -1291,6 +1371,7 @@ static int sdma_v5_0_early_init(void *handle)
sdma_v5_0_set_buffer_funcs(adev);
sdma_v5_0_set_vm_pte_funcs(adev);
sdma_v5_0_set_irq_funcs(adev);
sdma_v5_0_set_mqd_funcs(adev);
return 0;
}
@ -1511,7 +1592,25 @@ static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: SDMA trap\n");
if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
struct amdgpu_mes_queue *queue;
mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
spin_lock(&adev->mes.queue_id_lock);
queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
if (queue) {
DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
amdgpu_fence_process(queue->ring);
}
spin_unlock(&adev->mes.queue_id_lock);
return 0;
}
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {

View file

@ -248,7 +248,7 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
u64 *rptr;
/* XXX check if swapping is necessary on BE */
rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
rptr = (u64 *)ring->rptr_cpu_addr;
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@ -268,7 +268,7 @@ static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
@ -301,8 +301,8 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
@ -460,14 +460,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
if ((flags & AMDGPU_FENCE_FLAG_INT)) {
uint32_t ctx = ring->is_mes_queue ?
(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
}
}
/**
* sdma_v5_2_gfx_stop - stop the gfx async dma engines
*
@ -513,17 +514,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
}
/**
* sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
* sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
* context switch for an instance
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs context switch.
* @instance_idx: the index of the SDMA instance
*
* Halt or unhalt the async dma engines context switch.
* Unhalt the async dma engines context switch.
*/
static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
{
u32 f32_cntl, phase_quantum = 0;
int i;
if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
return;
}
if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
@ -547,50 +552,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
WREG32_SOC15_IP(GC,
sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
WREG32_SOC15_IP(GC,
sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
phase_quantum);
WREG32_SOC15_IP(GC,
sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
}
for (i = 0; i < adev->sdma.num_instances; i++) {
if (enable && amdgpu_sdma_phase_quantum) {
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
phase_quantum);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
}
if (!amdgpu_sriov_vf(adev)) {
f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, enable ? 1 : 0);
WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
}
if (!amdgpu_sriov_vf(adev)) {
f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 1);
WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
}
}
/**
* sdma_v5_2_enable - stop the async dma engines
* sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs.
*
* Halt or unhalt the async dma engines.
* Halt the async dma engines context switch.
*/
static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
{
u32 f32_cntl;
int i;
if (!enable) {
sdma_v5_2_gfx_stop(adev);
sdma_v5_2_rlc_stop(adev);
if (amdgpu_sriov_vf(adev))
return;
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0);
WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
}
}
/**
* sdma_v5_2_halt - stop the async dma engines
*
* @adev: amdgpu_device pointer
*
* Halt the async dma engines.
*/
static void sdma_v5_2_halt(struct amdgpu_device *adev)
{
int i;
u32 f32_cntl;
sdma_v5_2_gfx_stop(adev);
sdma_v5_2_rlc_stop(adev);
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
}
}
@ -602,6 +625,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
* @adev: amdgpu_device pointer
*
* Set up the gfx DMA ring buffers and enable them.
* It assumes that the dma engine is stopped for each instance.
* The function enables the engine and preemptions sequentially for each instance.
*
* Returns 0 for success, error for failure.
*/
static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
@ -609,7 +635,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
@ -619,7 +644,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
wb_offset = (ring->rptr_offs * 4);
if (!amdgpu_sriov_vf(adev))
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
@ -642,7 +666,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
@ -657,9 +681,9 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
/* set the wb address whether it's enabled or not */
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
@ -745,10 +769,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
ring->sched.ready = true;
if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
sdma_v5_2_ctx_switch_enable(adev, true);
sdma_v5_2_enable(adev, true);
}
sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
r = amdgpu_ring_test_ring(ring);
if (r) {
@ -792,7 +813,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
int i, j;
/* halt the MEs */
sdma_v5_2_enable(adev, false);
sdma_v5_2_halt(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
if (!adev->sdma.instance[i].fw)
@ -864,8 +885,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
int r = 0;
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
sdma_v5_2_ctx_switch_disable_all(adev);
sdma_v5_2_halt(adev);
/* set RB registers */
r = sdma_v5_2_gfx_resume(adev);
@ -889,12 +910,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
amdgpu_gfx_off_ctrl(adev, false);
sdma_v5_2_soft_reset(adev);
/* unhalt the MEs */
sdma_v5_2_enable(adev, true);
/* enable sdma ring preemption */
sdma_v5_2_ctx_switch_enable(adev, true);
/* start the gfx rings and rlc compute queues */
/* Soft reset supposes to disable the dma engine and preemption.
* Now start the gfx rings and rlc compute queues.
*/
r = sdma_v5_2_gfx_resume(adev);
if (adev->in_s0ix)
amdgpu_gfx_off_ctrl(adev, true);
@ -905,6 +924,49 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
return r;
}
static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd,
struct amdgpu_mqd_prop *prop)
{
struct v10_sdma_mqd *m = mqd;
uint64_t wb_gpu_addr;
m->sdmax_rlcx_rb_cntl =
order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
wb_gpu_addr = prop->wptr_gpu_addr;
m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
wb_gpu_addr = prop->rptr_gpu_addr;
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
mmSDMA0_GFX_IB_CNTL));
m->sdmax_rlcx_doorbell_offset =
prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
return 0;
}
static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev)
{
adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init;
}
/**
* sdma_v5_2_ring_test_ring - simple async dma engine test
*
@ -922,18 +984,29 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
volatile uint32_t *cpu_ptr = NULL;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
return r;
tmp = 0xCAFEDEAD;
if (ring->is_mes_queue) {
uint32_t offset = 0;
offset = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_PADDING_OFFS);
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
*cpu_ptr = tmp;
} else {
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(tmp);
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
@ -949,7 +1022,10 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
if (ring->is_mes_queue)
tmp = le32_to_cpu(*cpu_ptr);
else
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@ -961,7 +1037,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
amdgpu_device_wb_free(adev, index);
if (!ring->is_mes_queue)
amdgpu_device_wb_free(adev, index);
return r;
}
@ -984,21 +1061,37 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
volatile uint32_t *cpu_ptr = NULL;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
if (ring->is_mes_queue) {
uint32_t offset = 0;
offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
offset = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_PADDING_OFFS);
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
*cpu_ptr = tmp;
} else {
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
}
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@ -1025,7 +1118,12 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (ring->is_mes_queue)
tmp = le32_to_cpu(*cpu_ptr);
else
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
else
@ -1035,7 +1133,8 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
if (!ring->is_mes_queue)
amdgpu_device_wb_free(adev, index);
return r;
}
@ -1235,6 +1334,7 @@ static int sdma_v5_2_early_init(void *handle)
sdma_v5_2_set_buffer_funcs(adev);
sdma_v5_2_set_vm_pte_funcs(adev);
sdma_v5_2_set_irq_funcs(adev);
sdma_v5_2_set_mqd_funcs(adev);
return 0;
}
@ -1347,8 +1447,8 @@ static int sdma_v5_2_hw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
return 0;
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
sdma_v5_2_ctx_switch_disable_all(adev);
sdma_v5_2_halt(adev);
return 0;
}
@ -1468,7 +1568,25 @@ static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: SDMA trap\n");
if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
struct amdgpu_mes_queue *queue;
mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
spin_lock(&adev->mes.queue_id_lock);
queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
if (queue) {
DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
amdgpu_fence_process(queue->ring);
}
spin_unlock(&adev->mes.queue_id_lock);
return 0;
}
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,30 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __SDMA_V6_0_H__
#define __SDMA_V6_0_H__
extern const struct amd_ip_funcs sdma_v6_0_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v6_0_ip_block;
#endif /* __SDMA_V6_0_H__ */

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more