Merge remote-tracking branch 'drm/drm-next' into drm-misc-next

Pick up rc3 and rc4 and the merges from the other branches,
we're a bit out of date.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
This commit is contained in:
Maarten Lankhorst 2019-06-17 10:17:38 +02:00
commit f5500f385b
268 changed files with 9814 additions and 3508 deletions

View file

@ -9,6 +9,7 @@ Required properties:
- compatible : Shall contain one of
- "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
- "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders
- "renesas,r8a774a1-lvds" for R8A774A1 (RZ/G2M) compatible LVDS encoders
- "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders
- "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
- "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
@ -45,14 +46,24 @@ OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
Each port shall have a single endpoint.
Optional properties:
- renesas,companion : phandle to the companion LVDS encoder. This property is
mandatory for the first LVDS encoder on D3 and E3 SoCs, and shall point to
the second encoder to be used as a companion in dual-link mode. It shall not
be set for any other LVDS encoder.
Example:
lvds0: lvds@feb90000 {
compatible = "renesas,r8a7790-lvds";
reg = <0 0xfeb90000 0 0x1c>;
clocks = <&cpg CPG_MOD 726>;
resets = <&cpg 726>;
compatible = "renesas,r8a77990-lvds";
reg = <0 0xfeb90000 0 0x20>;
clocks = <&cpg CPG_MOD 727>;
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
resets = <&cpg 727>;
renesas,companion = <&lvds1>;
ports {
#address-cells = <1>;

View file

@ -28,6 +28,12 @@ Optional video port nodes:
- port@1: Second LVDS input port
- port@3: Second digital CMOS/TTL parallel output
The device can operate in single-link mode or dual-link mode. In single-link
mode, all pixels are received on port@0, and port@1 shall not contain any
endpoint. In dual-link mode, even-numbered pixels are received on port@0 and
odd-numbered pixels on port@1, and both port@0 and port@1 shall contain
endpoints.
Example:
--------

View file

@ -7,6 +7,7 @@ Required Properties:
- "renesas,du-r8a7744" for R8A7744 (RZ/G1N) compatible DU
- "renesas,du-r8a7745" for R8A7745 (RZ/G1E) compatible DU
- "renesas,du-r8a77470" for R8A77470 (RZ/G1C) compatible DU
- "renesas,du-r8a774a1" for R8A774A1 (RZ/G2M) compatible DU
- "renesas,du-r8a774c0" for R8A774C0 (RZ/G2E) compatible DU
- "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
- "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
@ -58,6 +59,7 @@ corresponding to each DU output.
R8A7744 (RZ/G1N) DPAD 0 LVDS 0 - -
R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - -
R8A77470 (RZ/G1C) DPAD 0 DPAD 1 LVDS 0 -
R8A774A1 (RZ/G2M) DPAD 0 HDMI 0 LVDS 0 -
R8A774C0 (RZ/G2E) DPAD 0 LVDS 0 LVDS 1 -
R8A7779 (R-Car H1) DPAD 0 DPAD 1 - -
R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 -

View file

@ -37,10 +37,10 @@ Buffer Objects
PRIME Buffer Sharing
--------------------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
:doc: PRIME Buffer Sharing
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
:internal:
MMU Notifier
@ -70,6 +70,26 @@ Interrupt Handling
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
:internal:
AMDGPU XGMI Support
===================
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
:doc: AMDGPU XGMI Support
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
:internal:
AMDGPU RAS debugfs control interface
====================================
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:doc: AMDGPU RAS debugfs control interface
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:internal:
GPU Power/Thermal Controls and Monitoring
=========================================

View file

@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
select MMU_NOTIFIER
depends on ARCH_HAS_HMM
select HMM_MIRROR
help
This option selects CONFIG_MMU_NOTIFIER if it isn't already
selected to enabled full userptr support.
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"

View file

@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
@ -173,7 +173,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile

View file

@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
extern int amdgpu_lockup_timeout;
extern int amdgpu_dpm;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
@ -211,6 +210,7 @@ struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
struct amdgpu_atif;
struct kfd_vm_fault_info;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_EOP = 0,
@ -415,6 +415,7 @@ struct amdgpu_fpriv {
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {
uint64_t *count1);
/* do we need to reset the asic at init time (e.g., kexec) */
bool (*need_reset_on_init)(struct amdgpu_device *adev);
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
};
/*
@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma1;
};
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
};
struct amdgpu_nbio_funcs {
const struct nbio_hdp_flush_reg *hdp_flush_reg;
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
void (*detect_hw_virt)(struct amdgpu_device *adev);
void (*remap_hdp_registers)(struct amdgpu_device *adev);
};
struct amdgpu_df_funcs {
@ -680,6 +689,12 @@ struct amdgpu_df_funcs {
u32 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
int is_enable);
int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
int is_disable);
void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
uint64_t *count);
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
@ -764,6 +779,7 @@ struct amdgpu_device {
void __iomem *rmmio;
/* protects concurrent MM_INDEX/DATA based register access */
spinlock_t mmio_idx_lock;
struct amdgpu_mmio_remap rmmio_remap;
/* protects concurrent SMC based register access */
spinlock_t smc_idx_lock;
amdgpu_rreg_t smc_rreg;
@ -906,7 +922,7 @@ struct amdgpu_device {
const struct amdgpu_df_funcs *df_funcs;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
struct delayed_work delayed_init_work;
struct amdgpu_virt virt;
/* firmware VRAM reservation */
@ -936,6 +952,13 @@ struct amdgpu_device {
struct work_struct xgmi_reset_work;
bool in_baco_reset;
long gfx_timeout;
long sdma_timeout;
long video_timeout;
long compute_timeout;
uint64_t unique_id;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
bool amdgpu_device_is_px(struct drm_device *dev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
void amdgpu_register_atpx_handler(void);

View file

@ -25,8 +25,10 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_dma_buf.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
static const unsigned int compute_vmid_bitmap = 0xFF00;
@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
};
/* this is going to have a few of the MSBs set that we need to
* clear */
* clear
*/
bitmap_complement(gpu_resources.queue_bitmap,
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
gpu_resources.queue_bitmap);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant */
* nbits is not compile time constant
*/
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
amdgpu_bo_unref(&(bo));
}
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
void **mem_obj)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
struct amdgpu_bo_param bp;
int r;
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = 1;
bp.domain = AMDGPU_GEM_DOMAIN_GWS;
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate gws BO for amdkfd (%d)\n", r);
return r;
}
*mem_obj = bo;
return 0;
}
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
{
struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
amdgpu_bo_unref(&bo);
}
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
enum kgd_engine_type type)
{
@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
return adev->gmc.xgmi.hive_id;
}
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
{
struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
struct amdgpu_device *adev = (struct amdgpu_device *)dst;
int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
if (ret < 0) {
DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
adev->gmc.xgmi.physical_node_id,
peer_adev->gmc.xgmi.physical_node_id, ret);
ret = 0;
}
return (uint8_t)ret;
}
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
return adev->rmmio_remap.bus_addr;
}
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
return adev->gds.gws_size;
}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,

View file

@ -61,7 +61,6 @@ struct kgd_mem {
atomic_t invalid;
struct amdkfd_process_info *process_info;
struct page **user_pages;
struct amdgpu_sync sync;
@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
enum kgd_engine_type type);
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint32_t *flags);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
#define read_user_wptr(mmptr, wptr, dst) \
({ \

View file

@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
pr_debug("kfd: sdma base address: 0x%x\n", retval);
pr_debug("sdma base address: 0x%x\n", retval);
return retval;
}

View file

@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
pr_debug("kfd: sdma base address: 0x%x\n", retval);
pr_debug("sdma base address: 0x%x\n", retval);
return retval;
}

View file

@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
lock_srbm(kgd, 0, 0, 0, vmid);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
unlock_srbm(kgd);
@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
}
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
WREG32(reg, mqd_hqd[reg - hqd_base]);
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
if (wptr) {
/* Don't read wptr with get_user because the user
@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
release_queue(kgd);
@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
/* Use legacy mode tlb invalidation.
*
* Currently on Raven the code below is broken for anything but
* legacy mode due to a MMHUB power gating problem. A workaround
* is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
* == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
* bit.
*
* TODO 1: agree on the right set of invalidation registers for
* KFD use. Use the last one for now. Invalidate both GC and
* MMHUB.
*
* TODO 2: support range-based invalidation, requires kfg2kgd
* interface change
*/
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
}
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
uint32_t flush_type)
{
signed long r;
uint32_t seq;
@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
uint32_t flush_type = 0;
if (adev->in_gpu_reset)
return -EIO;
if (adev->gmc.xgmi.num_physical_nodes &&
adev->asic_type == CHIP_VEGA20)
flush_type = 2;
if (ring->sched.ready)
return invalidate_tlbs_with_kiq(adev, pasid);
return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
== pasid) {
write_vmid_invalidate_request(kgd, vmid);
amdgpu_gmc_flush_gpu_tlb(adev, vmid,
flush_type);
break;
}
}
@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
return 0;
}
write_vmid_invalidate_request(kgd, vmid);
/* Use legacy mode tlb invalidation.
*
* Currently on Raven the code below is broken for anything but
* legacy mode due to a MMHUB power gating problem. A workaround
* is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
* == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
* bit.
*
* TODO 1: agree on the right set of invalidation registers for
* KFD use. Use the last one for now. Invalidate both GC and
* MMHUB.
*
* TODO 2: support range-based invalidation, requires kfg2kgd
* interface change
*/
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
return 0;
}
@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
mutex_lock(&adev->grbm_idx_mutex);
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;

View file

@ -31,6 +31,7 @@
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
/* Special VM and GART address alignment needed for VI pre-Fiji due to
* a HW bug.
@ -457,6 +458,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
mutex_unlock(&process_info->lock);
}
static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
{
struct ttm_validate_buffer *bo_list_entry;
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
}
/* Initializes user pages. It registers the MMU notifier and validates
* the userptr BO in the GTT domain.
*
@ -492,28 +504,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
goto out;
}
/* If no restore worker is running concurrently, user_pages
* should not be allocated
*/
WARN(mem->user_pages, "Leaking user_pages array");
mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!mem->user_pages) {
pr_err("%s: Failed to allocate pages array\n", __func__);
ret = -ENOMEM;
goto unregister_out;
}
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto free_out;
goto unregister_out;
}
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@ -526,11 +522,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
amdgpu_bo_unreserve(bo);
release_out:
if (ret)
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
free_out:
kvfree(mem->user_pages);
mem->user_pages = NULL;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@ -589,13 +581,12 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
false, &ctx->duplicates);
false, &ctx->duplicates, true);
if (!ret)
ctx->reserved = true;
else {
@ -653,7 +644,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
i = 0;
@ -669,7 +659,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
}
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
false, &ctx->duplicates);
false, &ctx->duplicates, true);
if (!ret)
ctx->reserved = true;
else
@ -897,6 +887,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
if (ret)
goto reserve_shared_fail;
amdgpu_bo_fence(vm->root.base.bo,
&vm->process_info->eviction_fence->base, true);
amdgpu_bo_unreserve(vm->root.base.bo);
@ -910,6 +903,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return 0;
reserve_shared_fail:
wait_pd_fail:
validate_pd_fail:
amdgpu_bo_unreserve(vm->root.base.bo);
@ -1110,7 +1104,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (!offset || !*offset)
return -EINVAL;
user_addr = *offset;
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
ALLOC_MEM_FLAGS_MMIO_REMAP)) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
@ -1200,12 +1195,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (user_addr) {
ret = init_user_pages(*mem, current->mm, user_addr);
if (ret) {
mutex_lock(&avm->process_info->lock);
list_del(&(*mem)->validate_list.head);
mutex_unlock(&avm->process_info->lock);
if (ret)
goto allocate_init_user_pages_failed;
}
}
if (offset)
@ -1214,6 +1205,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return 0;
allocate_init_user_pages_failed:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
@ -1263,15 +1255,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
/* Free user pages if necessary */
if (mem->user_pages) {
pr_debug("%s: Freeing user_pages array\n", __func__);
if (mem->user_pages[0])
release_pages(mem->user_pages,
mem->bo->tbo.ttm->num_pages);
kvfree(mem->user_pages);
}
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@ -1295,8 +1278,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
/* If the SG is not NULL, it's one we created for a doorbell
* BO. We need to free it.
/* If the SG is not NULL, it's one we created for a doorbell or mmio
* remap BO. We need to free it.
*/
if (mem->bo->tbo.sg) {
sg_free_table(mem->bo->tbo.sg);
@ -1410,7 +1393,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
is_invalid_userptr);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
pr_err("Failed to map bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
}
@ -1745,25 +1728,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
if (!mem->user_pages) {
mem->user_pages =
kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!mem->user_pages) {
pr_err("%s: Failed to allocate pages array\n",
__func__);
return -ENOMEM;
}
} else if (mem->user_pages[0]) {
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
}
/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
mem->user_pages);
bo->tbo.ttm->pages);
if (ret) {
mem->user_pages[0] = NULL;
bo->tbo.ttm->pages[0] = NULL;
pr_info("%s: Failed to get user pages: %d\n",
__func__, ret);
/* Pretend it succeeded. It will fail later
@ -1772,17 +1741,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* stalled user mode queues.
*/
}
/* Mark the BO as valid unless it was invalidated
* again concurrently
*/
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
return -EAGAIN;
}
return 0;
}
/* Remove invalid userptr BOs from hmm track list
*
* Stop HMM track the userptr update
*/
static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
{
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list,
validate_list.head) {
bo = mem->bo;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
}
}
/* Validate invalid userptr BOs
*
* Validates BOs on the userptr_inval_list, and moves them back to the
@ -1807,7 +1787,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
return -ENOMEM;
ret = -ENOMEM;
goto out_no_mem;
}
INIT_LIST_HEAD(&resv_list);
@ -1828,10 +1809,11 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
/* Reserve all BOs and page tables for validation */
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
true);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
goto out;
goto out_free;
amdgpu_sync_create(&sync);
@ -1847,10 +1829,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
bo = mem->bo;
/* Copy pages array and validate the BO if we got user pages */
if (mem->user_pages[0]) {
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
mem->user_pages);
/* Validate the BO if we got user pages */
if (bo->tbo.ttm->pages[0]) {
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
@ -1859,16 +1839,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
/* Validate succeeded, now the BO owns the pages, free
* our copy of the pointer array. Put this BO back on
* the userptr_valid_list. If we need to revalidate
* it, we need to start from scratch.
*/
kvfree(mem->user_pages);
mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
/* Stop HMM track the userptr update. We dont check the return
* value for concurrent CPU page table update because we will
* reschedule the restore worker if process_info->evicted_bos
* is updated.
*/
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
@ -1898,8 +1878,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
out:
out_free:
kfree(pd_bo_list_entries);
out_no_mem:
return ret;
}
@ -1964,7 +1945,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* hanging. No point trying again.
*/
}
unlock_out:
untrack_invalid_user_pages(process_info);
mutex_unlock(&process_info->lock);
mmput(mm);
put_task_struct(usertask);
@ -2033,7 +2016,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
false, &duplicate_save);
false, &duplicate_save, true);
if (ret) {
pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
goto ttm_reserve_fail;
@ -2131,3 +2114,88 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
kfree(pd_bo_list);
return ret;
}
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
{
struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
int ret;
if (!info || !gws)
return -EINVAL;
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem)
return -ENOMEM;
mutex_init(&(*mem)->lock);
(*mem)->bo = amdgpu_bo_ref(gws_bo);
(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
(*mem)->process_info = process_info;
add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
amdgpu_sync_create(&(*mem)->sync);
/* Validate gws bo the first time it is added to process */
mutex_lock(&(*mem)->process_info->lock);
ret = amdgpu_bo_reserve(gws_bo, false);
if (unlikely(ret)) {
pr_err("Reserve gws bo failed %d\n", ret);
goto bo_reservation_failure;
}
ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
if (ret) {
pr_err("GWS BO validate failed %d\n", ret);
goto bo_validation_failure;
}
/* GWS resource is shared b/t amdgpu and amdkfd
* Add process eviction fence to bo so they can
* evict each other.
*/
amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
amdgpu_bo_unreserve(gws_bo);
mutex_unlock(&(*mem)->process_info->lock);
return ret;
bo_validation_failure:
amdgpu_bo_unreserve(gws_bo);
bo_reservation_failure:
mutex_unlock(&(*mem)->process_info->lock);
amdgpu_sync_free(&(*mem)->sync);
remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
amdgpu_bo_unref(&gws_bo);
mutex_destroy(&(*mem)->lock);
kfree(*mem);
*mem = NULL;
return ret;
}
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
{
int ret;
struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
struct amdgpu_bo *gws_bo = kgd_mem->bo;
/* Remove BO from process's validate list so restore worker won't touch
* it anymore
*/
remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
ret = amdgpu_bo_reserve(gws_bo, false);
if (unlikely(ret)) {
pr_err("Reserve gws bo failed %d\n", ret);
//TODO add BO back to validate_list?
return ret;
}
amdgpu_amdkfd_remove_eviction_fence(gws_bo,
process_info->eviction_fence);
amdgpu_bo_unreserve(gws_bo);
amdgpu_sync_free(&kgd_mem->sync);
amdgpu_bo_unref(&gws_bo);
mutex_destroy(&kgd_mem->lock);
kfree(mem);
return 0;
}

View file

@ -82,9 +82,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return -ENOMEM;
kref_init(&list->refcount);
list->gds_obj = adev->gds.gds_gfx_bo;
list->gws_obj = adev->gds.gws_gfx_bo;
list->oa_obj = adev->gds.oa_gfx_bo;
list->gds_obj = NULL;
list->gws_obj = NULL;
list->oa_obj = NULL;
array = amdgpu_bo_list_array_entry(list, 0);
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));

View file

@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo_va *bo_va;
uint32_t priority;
struct page **user_pages;
int user_invalidated;
bool user_invalidated;
};
struct amdgpu_bo_list {

View file

@ -54,7 +54,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.tv.bo = &bo->tbo;
/* One for TTM and one for the CS job */
p->uf_entry.tv.num_shared = 2;
p->uf_entry.user_pages = NULL;
drm_gem_object_put_unlocked(gobj);
@ -544,14 +543,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (usermm && usermm != current->mm)
return -EPERM;
/* Check if we have user pages and nobody bound the BO already */
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
lobj->user_pages) {
if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
lobj->user_invalidated && lobj->user_pages) {
amdgpu_bo_placement_from_domain(bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
binding_userptr = true;
@ -582,7 +581,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
@ -618,79 +616,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
while (1) {
struct list_head need_pages;
/* Get userptr backing pages. If pages are updated after registered
* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
* amdgpu_ttm_backend_bind() to flush and invalidate new pages
*/
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
bool userpage_invalidated = false;
int i;
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
&duplicates);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
goto error_free_pages;
e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!e->user_pages) {
DRM_ERROR("calloc failure\n");
return -ENOMEM;
}
INIT_LIST_HEAD(&need_pages);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
&e->user_invalidated) && e->user_pages) {
/* We acquired a page array, but somebody
* invalidated it. Free it and try again
*/
release_pages(e->user_pages,
bo->tbo.ttm->num_pages);
kvfree(e->user_pages);
e->user_pages = NULL;
}
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
!e->user_pages) {
list_del(&e->tv.head);
list_add(&e->tv.head, &need_pages);
amdgpu_bo_unreserve(bo);
}
r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
if (r) {
kvfree(e->user_pages);
e->user_pages = NULL;
return r;
}
if (list_empty(&need_pages))
break;
/* Unreserve everything again. */
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
/* We tried too many times, just abort */
if (!--tries) {
r = -EDEADLK;
DRM_ERROR("deadlock in %s\n", __func__);
goto error_free_pages;
}
/* Fill the page arrays for all userptrs. */
list_for_each_entry(e, &need_pages, tv.head) {
struct ttm_tt *ttm = e->tv.bo->ttm;
e->user_pages = kvmalloc_array(ttm->num_pages,
sizeof(struct page*),
GFP_KERNEL | __GFP_ZERO);
if (!e->user_pages) {
r = -ENOMEM;
DRM_ERROR("calloc failure in %s\n", __func__);
goto error_free_pages;
}
r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
if (r) {
DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
kvfree(e->user_pages);
e->user_pages = NULL;
goto error_free_pages;
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
userpage_invalidated = true;
break;
}
}
e->user_invalidated = userpage_invalidated;
}
/* And try again. */
list_splice(&need_pages, &p->validated);
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
&duplicates, true);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
goto out;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@ -759,17 +723,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
error_validate:
if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
error_free_pages:
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
if (!e->user_pages)
continue;
release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
kvfree(e->user_pages);
}
out:
return r;
}
@ -1056,11 +1010,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
j++;
}
/* UVD & VCE fw doesn't support user fences */
/* MM engine doesn't support user fences */
ring = to_amdgpu_ring(parser->entity->rq->sched);
if (parser->job->uf_addr && (
ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE))
if (parser->job->uf_addr && ring->funcs->no_user_fence)
return -EINVAL;
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
@ -1330,7 +1282,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
int r;
job = p->job;
@ -1340,15 +1291,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
/* No memory allocation is allowed while holding the mn lock */
/* No memory allocation is allowed while holding the mn lock.
* p->mn is hold until amdgpu_cs_submit is finished and fence is added
* to BOs.
*/
amdgpu_mn_lock(p->mn);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
*/
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
r = -ERESTARTSYS;
goto error_abort;
}
r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
}
if (r) {
r = -EAGAIN;
goto error_abort;
}
job->owner = p->filp;
@ -1444,6 +1403,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
return r;
}

View file

@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, true);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;

View file

@ -98,6 +98,28 @@ static const char *amdgpu_asic_name[] = {
"LAST",
};
/**
* DOC: pcie_replay_count
*
* The amdgpu driver provides a sysfs API for reporting the total number
* of PCIe replays (NAKs)
* The file pcie_replay_count is used for this and returns the total
* number of replays as a sum of the NAKs generated and NAKs received
*/
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
}
static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
amdgpu_device_get_pcie_replay_count, NULL);
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
@ -911,8 +933,10 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
* Validates certain module parameters and updates
* the associated values used by the driver (all asics).
*/
static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
int ret = 0;
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@ -957,12 +981,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vram_page_split = 1024;
}
if (amdgpu_lockup_timeout == 0) {
dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
amdgpu_lockup_timeout = 10000;
ret = amdgpu_device_get_job_timeout_settings(adev);
if (ret) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
return ret;
}
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
return ret;
}
/**
@ -1506,12 +1533,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
/* query the reg access mode at the very beginning */
amdgpu_virt_init_reg_access_mode(adev);
}
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
/* Read BIOS */
if (!amdgpu_get_bios(adev))
return -EINVAL;
r = amdgpu_atombios_init(adev);
if (r) {
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
return r;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d <%s>\n",
@ -1551,6 +1592,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
@ -1828,6 +1870,43 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
return 0;
}
static int amdgpu_device_enable_mgpu_fan_boost(void)
{
struct amdgpu_gpu_instance *gpu_ins;
struct amdgpu_device *adev;
int i, ret = 0;
mutex_lock(&mgpu_info.mutex);
/*
* MGPU fan boost feature should be enabled
* only when there are two or more dGPUs in
* the system
*/
if (mgpu_info.num_dgpu < 2)
goto out;
for (i = 0; i < mgpu_info.num_dgpu; i++) {
gpu_ins = &(mgpu_info.gpu_ins[i]);
adev = gpu_ins->adev;
if (!(adev->flags & AMD_IS_APU) &&
!gpu_ins->mgpu_fan_enabled &&
adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
if (ret)
break;
gpu_ins->mgpu_fan_enabled = 1;
}
}
out:
mutex_unlock(&mgpu_info.mutex);
return ret;
}
/**
* amdgpu_device_ip_late_init - run late init for hardware IPs
*
@ -1861,11 +1940,15 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
queue_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
amdgpu_device_fill_reset_magic(adev);
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
/* set to low pstate by default */
amdgpu_xgmi_set_pstate(adev, 0);
return 0;
}
@ -1964,65 +2047,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
return 0;
}
static int amdgpu_device_enable_mgpu_fan_boost(void)
{
struct amdgpu_gpu_instance *gpu_ins;
struct amdgpu_device *adev;
int i, ret = 0;
mutex_lock(&mgpu_info.mutex);
/*
* MGPU fan boost feature should be enabled
* only when there are two or more dGPUs in
* the system
*/
if (mgpu_info.num_dgpu < 2)
goto out;
for (i = 0; i < mgpu_info.num_dgpu; i++) {
gpu_ins = &(mgpu_info.gpu_ins[i]);
adev = gpu_ins->adev;
if (!(adev->flags & AMD_IS_APU) &&
!gpu_ins->mgpu_fan_enabled &&
adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
if (ret)
break;
gpu_ins->mgpu_fan_enabled = 1;
}
}
out:
mutex_unlock(&mgpu_info.mutex);
return ret;
}
/**
* amdgpu_device_ip_late_init_func_handler - work handler for ib test
* amdgpu_device_delayed_init_work_handler - work handler for IB tests
*
* @work: work_struct.
*/
static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, late_init_work.work);
container_of(work, struct amdgpu_device, delayed_init_work.work);
int r;
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
/*set to low pstate by default */
amdgpu_xgmi_set_pstate(adev, 0);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@ -2474,7 +2512,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->lock_reset);
mutex_init(&adev->virt.dpm_mutex);
amdgpu_device_check_arguments(adev);
r = amdgpu_device_check_arguments(adev);
if (r)
return r;
spin_lock_init(&adev->mmio_idx_lock);
spin_lock_init(&adev->smc_idx_lock);
@ -2492,8 +2532,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->ring_lru_list);
spin_lock_init(&adev->ring_lru_list_lock);
INIT_DELAYED_WORK(&adev->late_init_work,
amdgpu_device_ip_late_init_func_handler);
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
@ -2559,19 +2599,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto fence_driver_init;
}
/* Read BIOS */
if (!amdgpu_get_bios(adev)) {
r = -EINVAL;
goto failed;
}
r = amdgpu_atombios_init(adev);
if (r) {
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
goto failed;
}
/* detect if we are with an SRIOV vbios */
amdgpu_device_detect_sriov_bios(adev);
@ -2673,6 +2700,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
r = amdgpu_ucode_sysfs_init(adev);
if (r)
DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
r = amdgpu_debugfs_gem_init(adev);
if (r)
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@ -2713,7 +2744,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
/* must succeed. */
amdgpu_ras_post_init(adev);
amdgpu_ras_resume(adev);
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
if (r) {
dev_err(adev->dev, "Could not create pcie_replay_count");
return r;
}
return 0;
@ -2756,7 +2796,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
}
adev->accel_working = false;
cancel_delayed_work_sync(&adev->late_init_work);
cancel_delayed_work_sync(&adev->delayed_init_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
@ -2778,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
amdgpu_ucode_sysfs_fini(adev);
}
@ -2817,7 +2859,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (fbcon)
amdgpu_fbdev_set_suspend(adev, 1);
cancel_delayed_work_sync(&adev->late_init_work);
cancel_delayed_work_sync(&adev->delayed_init_work);
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
@ -2858,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
amdgpu_amdkfd_suspend(adev);
amdgpu_ras_suspend(adev);
r = amdgpu_device_ip_suspend_phase1(adev);
/* evict vram memory */
@ -2935,6 +2979,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (r)
return r;
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
if (!amdgpu_device_has_dc_support(adev)) {
/* pin cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@ -2958,7 +3005,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
return r;
/* Make sure IB tests flushed */
flush_delayed_work(&adev->late_init_work);
flush_delayed_work(&adev->delayed_init_work);
/* blat the mode back in */
if (fbcon) {
@ -2978,6 +3025,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
drm_kms_helper_poll_enable(dev);
amdgpu_ras_resume(adev);
/*
* Most of the connector probing functions try to acquire runtime pm
* refs to ensure that the GPU is powered on when connector polling is
@ -3456,6 +3505,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
r = amdgpu_device_ip_late_init(tmp_adev);
if (r)
goto out;
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
/* Update PSP FW topology after reset */
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
@ -3540,6 +3596,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU reset begin!\n");
cancel_delayed_work_sync(&adev->delayed_init_work);
hive = amdgpu_get_xgmi_hive(adev, false);
/*
@ -3696,43 +3754,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return r;
}
static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
enum pci_bus_speed *speed,
enum pcie_link_width *width)
{
struct pci_dev *pdev = adev->pdev;
enum pci_bus_speed cur_speed;
enum pcie_link_width cur_width;
u32 ret = 1;
*speed = PCI_SPEED_UNKNOWN;
*width = PCIE_LNK_WIDTH_UNKNOWN;
while (pdev) {
cur_speed = pcie_get_speed_cap(pdev);
cur_width = pcie_get_width_cap(pdev);
ret = pcie_bandwidth_available(adev->pdev, NULL,
NULL, &cur_width);
if (!ret)
cur_width = PCIE_LNK_WIDTH_RESRV;
if (cur_speed != PCI_SPEED_UNKNOWN) {
if (*speed == PCI_SPEED_UNKNOWN)
*speed = cur_speed;
else if (cur_speed < *speed)
*speed = cur_speed;
}
if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
if (*width == PCIE_LNK_WIDTH_UNKNOWN)
*width = cur_width;
else if (cur_width < *width)
*width = cur_width;
}
pdev = pci_upstream_bridge(pdev);
}
}
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
@ -3766,8 +3787,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
&platform_link_width);
pcie_bandwidth_available(adev->pdev, NULL,
&platform_speed_cap, &platform_link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */

View file

@ -633,10 +633,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
amdgpu_dither_enum_list, sz);
if (amdgpu_device_has_dc_support(adev)) {
adev->mode_info.max_bpc_property =
drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
if (!adev->mode_info.max_bpc_property)
return -ENOMEM;
adev->mode_info.abm_level_property =
drm_property_create_range(adev->ddev, 0,
"abm level", 0, 4);

View file

@ -1,5 +1,5 @@
/*
* Copyright 2012 Advanced Micro Devices, Inc.
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -101,7 +101,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
* Returns:
* 0 on success or a negative error code on failure.
*/
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
struct vm_area_struct *vma)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@ -135,6 +136,235 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
return ret;
}
static int
__reservation_object_make_exclusive(struct reservation_object *obj)
{
struct dma_fence **fences;
unsigned int count;
int r;
if (!reservation_object_get_list(obj)) /* no shared fences to convert */
return 0;
r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
if (r)
return r;
if (count == 0) {
/* Now that was unexpected. */
} else if (count == 1) {
reservation_object_add_excl_fence(obj, fences[0]);
dma_fence_put(fences[0]);
kfree(fences);
} else {
struct dma_fence_array *array;
array = dma_fence_array_create(count, fences,
dma_fence_context_alloc(1), 0,
false);
if (!array)
goto err_fences_put;
reservation_object_add_excl_fence(obj, &array->base);
dma_fence_put(&array->base);
}
return 0;
err_fences_put:
while (count--)
dma_fence_put(fences[count]);
kfree(fences);
return -ENOMEM;
}
/**
* amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* Makes sure that the shared DMA buffer can be accessed by the target device.
* For now, simply pins it to the GTT domain, where it should be accessible by
* all DMA devices.
*
* Returns:
* 0 on success or a negative error code on failure.
*/
static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
r = drm_gem_map_attach(dma_buf, attach);
if (r)
return r;
r = amdgpu_bo_reserve(bo, false);
if (unlikely(r != 0))
goto error_detach;
if (attach->dev->driver != adev->dev->driver) {
/*
* We only create shared fences for internal use, but importers
* of the dmabuf rely on exclusive fences for implicitly
* tracking write hazards. As any of the current fences may
* correspond to a write, we need to convert all existing
* fences on the reservation object into a single exclusive
* fence.
*/
r = __reservation_object_make_exclusive(bo->tbo.resv);
if (r)
goto error_unreserve;
}
/* pin buffer into GTT */
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error_unreserve;
if (attach->dev->driver != adev->dev->driver)
bo->prime_shared_count++;
error_unreserve:
amdgpu_bo_unreserve(bo);
error_detach:
if (r)
drm_gem_map_detach(dma_buf, attach);
return r;
}
/**
* amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* another device. For now, simply unpins the buffer from GTT.
*/
static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int ret = 0;
ret = amdgpu_bo_reserve(bo, true);
if (unlikely(ret != 0))
goto error;
amdgpu_bo_unpin(bo);
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
bo->prime_shared_count--;
amdgpu_bo_unreserve(bo);
error:
drm_gem_map_detach(dma_buf, attach);
}
/**
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
* @obj: GEM BO
*
* Returns:
* The BO's reservation object.
*/
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
return bo->tbo.resv;
}
/**
* amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
* @dma_buf: Shared DMA buffer
* @direction: Direction of DMA transfer
*
* This is called before CPU access to the shared DMA buffer's memory. If it's
* a read access, the buffer is moved to the GTT domain if possible, for optimal
* CPU read performance.
*
* Returns:
* 0 on success or a negative error code on failure.
*/
static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
u32 domain = amdgpu_display_supported_domains(adev);
int ret;
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
return 0;
/* move to gtt */
ret = amdgpu_bo_reserve(bo, false);
if (unlikely(ret != 0))
return ret;
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
amdgpu_bo_unreserve(bo);
return ret;
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_map_attach,
.detach = amdgpu_dma_buf_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
.unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
};
/**
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
* @dev: DRM device
* @gobj: GEM BO
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
*
* The main work is done by the &drm_gem_prime_export helper, which in turn
* uses &amdgpu_gem_prime_res_obj.
*
* Returns:
* Shared DMA buffer representing the GEM BO from the given device.
*/
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct dma_buf *buf;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
buf = drm_gem_prime_export(dev, gobj, flags);
if (!IS_ERR(buf)) {
buf->file->f_mapping = dev->anon_inode->i_mapping;
buf->ops = &amdgpu_dmabuf_ops;
}
return buf;
}
/**
* amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
* implementation
@ -186,235 +416,6 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
return ERR_PTR(ret);
}
static int
__reservation_object_make_exclusive(struct reservation_object *obj)
{
struct dma_fence **fences;
unsigned int count;
int r;
if (!reservation_object_get_list(obj)) /* no shared fences to convert */
return 0;
r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
if (r)
return r;
if (count == 0) {
/* Now that was unexpected. */
} else if (count == 1) {
reservation_object_add_excl_fence(obj, fences[0]);
dma_fence_put(fences[0]);
kfree(fences);
} else {
struct dma_fence_array *array;
array = dma_fence_array_create(count, fences,
dma_fence_context_alloc(1), 0,
false);
if (!array)
goto err_fences_put;
reservation_object_add_excl_fence(obj, &array->base);
dma_fence_put(&array->base);
}
return 0;
err_fences_put:
while (count--)
dma_fence_put(fences[count]);
kfree(fences);
return -ENOMEM;
}
/**
* amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* Makes sure that the shared DMA buffer can be accessed by the target device.
* For now, simply pins it to the GTT domain, where it should be accessible by
* all DMA devices.
*
* Returns:
* 0 on success or a negative error code on failure.
*/
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
r = drm_gem_map_attach(dma_buf, attach);
if (r)
return r;
r = amdgpu_bo_reserve(bo, false);
if (unlikely(r != 0))
goto error_detach;
if (attach->dev->driver != adev->dev->driver) {
/*
* We only create shared fences for internal use, but importers
* of the dmabuf rely on exclusive fences for implicitly
* tracking write hazards. As any of the current fences may
* correspond to a write, we need to convert all existing
* fences on the reservation object into a single exclusive
* fence.
*/
r = __reservation_object_make_exclusive(bo->tbo.resv);
if (r)
goto error_unreserve;
}
/* pin buffer into GTT */
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error_unreserve;
if (attach->dev->driver != adev->dev->driver)
bo->prime_shared_count++;
error_unreserve:
amdgpu_bo_unreserve(bo);
error_detach:
if (r)
drm_gem_map_detach(dma_buf, attach);
return r;
}
/**
* amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* another device. For now, simply unpins the buffer from GTT.
*/
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int ret = 0;
ret = amdgpu_bo_reserve(bo, true);
if (unlikely(ret != 0))
goto error;
amdgpu_bo_unpin(bo);
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
bo->prime_shared_count--;
amdgpu_bo_unreserve(bo);
error:
drm_gem_map_detach(dma_buf, attach);
}
/**
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
* @obj: GEM BO
*
* Returns:
* The BO's reservation object.
*/
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
return bo->tbo.resv;
}
/**
* amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
* @dma_buf: Shared DMA buffer
* @direction: Direction of DMA transfer
*
* This is called before CPU access to the shared DMA buffer's memory. If it's
* a read access, the buffer is moved to the GTT domain if possible, for optimal
* CPU read performance.
*
* Returns:
* 0 on success or a negative error code on failure.
*/
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
u32 domain = amdgpu_display_supported_domains(adev);
int ret;
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
return 0;
/* move to gtt */
ret = amdgpu_bo_reserve(bo, false);
if (unlikely(ret != 0))
return ret;
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
amdgpu_bo_unreserve(bo);
return ret;
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_gem_map_attach,
.detach = amdgpu_gem_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
.unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
};
/**
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
* @dev: DRM device
* @gobj: GEM BO
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
*
* The main work is done by the &drm_gem_prime_export helper, which in turn
* uses &amdgpu_gem_prime_res_obj.
*
* Returns:
* Shared DMA buffer representing the GEM BO from the given device.
*/
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct dma_buf *buf;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
buf = drm_gem_prime_export(dev, gobj, flags);
if (!IS_ERR(buf)) {
buf->file->f_mapping = dev->anon_inode->i_mapping;
buf->ops = &amdgpu_dmabuf_ops;
}
return buf;
}
/**
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
* @dev: DRM device

View file

@ -0,0 +1,46 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_DMA_BUF_H__
#define __AMDGPU_DMA_BUF_H__
#include <drm/drm_gem.h>
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
struct vm_area_struct *vma);
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
#endif

View file

@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
int min_temp;
/* high temperature threshold */
int max_temp;
/* edge max emergency(shutdown) temp */
int max_edge_emergency_temp;
/* hotspot low temperature threshold */
int min_hotspot_temp;
/* hotspot high temperature critical threshold */
int max_hotspot_crit_temp;
/* hotspot max emergency(shutdown) temp */
int max_hotspot_emergency_temp;
/* memory low temperature threshold */
int min_mem_temp;
/* memory high temperature critical threshold */
int max_mem_crit_temp;
/* memory max emergency(shutdown) temp */
int max_mem_emergency_temp;
/* was last interrupt low to high or high to low */
bool high_to_low;
/* interrupt source */

View file

@ -38,7 +38,7 @@
#include "amdgpu.h"
#include "amdgpu_irq.h"
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_amdkfd.h"
@ -83,6 +83,8 @@
#define KMS_DRIVER_MINOR 32
#define KMS_DRIVER_PATCHLEVEL 0
#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
@ -95,7 +97,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
int amdgpu_lockup_timeout = 10000;
char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
int amdgpu_dpm = -1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
@ -229,12 +231,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
* DOC: lockup_timeout (int)
* Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
* Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
* multiple values specified. 0 and negative values are invalidated. They will be adjusted
* to default timeout.
* - With one value specified, the setting will apply to all non-compute jobs.
* - With multiple values specified, the first one will be for GFX. The second one is for Compute.
* And the third and fourth ones are for SDMA and Video.
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
* jobs is 10000. And there is no timeout enforced on compute jobs.
*/
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), "
"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
@ -657,6 +668,16 @@ MODULE_PARM_DESC(noretry,
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
/**
* DOC: hws_gws_support(bool)
* Whether HWS support gws barriers. Default value: false (not supported)
* This will be replaced with a MEC firmware version check once firmware
* is ready
*/
bool hws_gws_support;
module_param(hws_gws_support, bool, 0444);
MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
#endif
/**
@ -1218,6 +1239,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
return 0;
}
int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
{
char *input = amdgpu_lockup_timeout;
char *timeout_setting = NULL;
int index = 0;
long timeout;
int ret = 0;
/*
* By default timeout for non compute jobs is 10000.
* And there is no timeout enforced on compute jobs.
*/
adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000;
adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
while ((timeout_setting = strsep(&input, ",")) &&
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
ret = kstrtol(timeout_setting, 0, &timeout);
if (ret)
return ret;
/* Invalidate 0 and negative values */
if (timeout <= 0) {
index++;
continue;
}
switch (index++) {
case 0:
adev->gfx_timeout = timeout;
break;
case 1:
adev->compute_timeout = timeout;
break;
case 2:
adev->sdma_timeout = timeout;
break;
case 3:
adev->video_timeout = timeout;
break;
default:
break;
}
}
/*
* There is only one value specified and
* it should apply to all non-compute jobs.
*/
if (index == 1)
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
}
return ret;
}
static bool
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
bool in_vblank_irq, int *vpos, int *hpos,

View file

@ -429,9 +429,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission)
{
struct amdgpu_device *adev = ring->adev;
long timeout;
int r;
if (!adev)
return -EINVAL;
/* Check that num_hw_submission is a power of two */
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
return -EINVAL;
@ -453,12 +457,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
/* No need to setup the GPU scheduler for KIQ ring */
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
/* for non-sriov case, no timeout enforce on compute ring */
if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
&& !amdgpu_sriov_vf(ring->adev))
timeout = MAX_SCHEDULE_TIMEOUT;
else
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_COMPUTE:
/*
* For non-sriov case, no timeout enforce
* on compute ring by default. Unless user
* specifies a timeout for compute ring.
*
* For sriov case, always use the timeout
* as gfx ring
*/
if (!amdgpu_sriov_vf(ring->adev))
timeout = adev->compute_timeout;
else
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_SDMA:
timeout = adev->sdma_timeout;
break;
default:
timeout = adev->video_timeout;
break;
}
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,

View file

@ -27,26 +27,11 @@
struct amdgpu_ring;
struct amdgpu_bo;
struct amdgpu_gds_asic_info {
uint32_t total_size;
uint32_t gfx_partition_size;
uint32_t cs_partition_size;
};
struct amdgpu_gds {
struct amdgpu_gds_asic_info mem;
struct amdgpu_gds_asic_info gws;
struct amdgpu_gds_asic_info oa;
uint32_t gds_size;
uint32_t gws_size;
uint32_t oa_size;
uint32_t gds_compute_max_wave_id;
/* At present, GDS, GWS and OA resources for gfx (graphics)
* is always pre-allocated and available for graphics operation.
* Such resource is shared between all gfx clients.
* TODO: move this operation to user space
* */
struct amdgpu_bo* gds_gfx_bo;
struct amdgpu_bo* gws_gfx_bo;
struct amdgpu_bo* oa_gfx_bo;
};
struct amdgpu_gds_reg_offset {

View file

@ -175,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, true);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
@ -334,26 +334,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_reserve(bo, true);
if (r)
goto free_pages;
goto user_pages_done;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (r)
goto free_pages;
goto user_pages_done;
}
r = drm_gem_handle_create(filp, gobj, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_put_unlocked(gobj);
if (r)
return r;
goto user_pages_done;
args->handle = handle;
return 0;
free_pages:
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
user_pages_done:
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
release_object:
drm_gem_object_put_unlocked(gobj);
@ -614,7 +612,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, true);
if (r)
goto error_unref;

View file

@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
/*
* GEM objects.

View file

@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
else
drm_sched_suspend_timeout(&ring->sched);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,

View file

@ -593,13 +593,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_gds gds_info;
memset(&gds_info, 0, sizeof(gds_info));
gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
gds_info.gds_total_size = adev->gds.mem.total_size;
gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
gds_info.compute_partition_size = adev->gds.gds_size;
gds_info.gds_total_size = adev->gds.gds_size;
gds_info.gws_per_compute_partition = adev->gds.gws_size;
gds_info.oa_per_compute_partition = adev->gds.oa_size;
return copy_to_user(out, &gds_info,
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
}
@ -980,7 +977,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
int r, pasid;
/* Ensure IB tests are run on ring */
flush_delayed_work(&adev->late_init_work);
flush_delayed_work(&adev->delayed_init_work);
file_priv->driver_priv = NULL;

View file

@ -45,7 +45,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/mmu_notifier.h>
#include <linux/hmm.h>
#include <linux/interval_tree.h>
#include <drm/drm.h>
@ -58,14 +58,12 @@
*
* @adev: amdgpu device pointer
* @mm: process address space
* @mn: MMU notifier structure
* @type: type of MMU notifier
* @work: destruction work item
* @node: hash table node to find structure by adev and mn
* @lock: rw semaphore protecting the notifier nodes
* @objects: interval tree containing amdgpu_mn_nodes
* @read_lock: mutex for recursive locking of @lock
* @recursion: depth of recursion
* @mirror: HMM mirror function support
*
* Data for each amdgpu device and process address space.
*/
@ -73,7 +71,6 @@ struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
@ -85,8 +82,9 @@ struct amdgpu_mn {
/* objects protected by lock */
struct rw_semaphore lock;
struct rb_root_cached objects;
struct mutex read_lock;
atomic_t recursion;
/* HMM mirror */
struct hmm_mirror mirror;
};
/**
@ -103,7 +101,7 @@ struct amdgpu_mn_node {
};
/**
* amdgpu_mn_destroy - destroy the MMU notifier
* amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
* amdgpu_mn_release - callback to notify about mm destruction
* amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
* @mn: our notifier
* @mm: the mm this callback is about
* @mirror: the HMM mirror (mm) this callback is about
*
* Shedule a work item to lazy destroy our notifier.
* Shedule a work item to lazy destroy HMM mirror.
*/
static void amdgpu_mn_release(struct mmu_notifier *mn,
struct mm_struct *mm)
static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
if (blockable)
mutex_lock(&amn->read_lock);
else if (!mutex_trylock(&amn->read_lock))
down_read(&amn->lock);
else if (!down_read_trylock(&amn->lock))
return -EAGAIN;
if (atomic_inc_return(&amn->recursion) == 1)
down_read_non_owner(&amn->lock);
mutex_unlock(&amn->read_lock);
return 0;
}
@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
*/
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
{
if (atomic_dec_return(&amn->recursion) == 0)
up_read_non_owner(&amn->lock);
up_read(&amn->lock);
}
/**
@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
true, false, MAX_SCHEDULE_TIMEOUT);
if (r <= 0)
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
}
}
/**
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
* amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
*
* @mn: our notifier
* @range: mmu notifier context
* @mirror: the hmm_mirror (mm) is about to update
* @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
const struct hmm_update *update)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
unsigned long start = update->start;
unsigned long end = update->end;
bool blockable = update->blockable;
struct interval_tree_node *it;
unsigned long end;
/* notification is exclusive, but interval is inclusive */
end = range->end - 1;
end -= 1;
/* TODO we should be able to split locking for interval tree and
* amdgpu_mn_invalidate_node
*/
if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
it = interval_tree_iter_first(&amn->objects, range->start, end);
it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
if (!mmu_notifier_range_blockable(range)) {
if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, range->start, end);
it = interval_tree_iter_next(it, start, end);
amdgpu_mn_invalidate_node(node, range->start, end);
amdgpu_mn_invalidate_node(node, start, end);
}
amdgpu_mn_read_unlock(amn);
return 0;
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
* amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
*
* @mn: our notifier
* @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
* @mirror: the hmm_mirror (mm) is about to update
* @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
const struct hmm_update *update)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
unsigned long start = update->start;
unsigned long end = update->end;
bool blockable = update->blockable;
struct interval_tree_node *it;
unsigned long end;
/* notification is exclusive, but interval is inclusive */
end = range->end - 1;
end -= 1;
if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
it = interval_tree_iter_first(&amn->objects, range->start, end);
it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
if (!mmu_notifier_range_blockable(range)) {
if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, range->start, end);
it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
struct kgd_mem *mem = bo->kfd_bo;
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
range->start,
end))
amdgpu_amdkfd_evict_userptr(mem, range->mm);
start, end))
amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
amdgpu_mn_read_unlock(amn);
return 0;
}
/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
* @mn: our notifier
* @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
* Release the lock again to allow new command submissions.
*/
static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
amdgpu_mn_read_unlock(amn);
}
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
},
[AMDGPU_MN_TYPE_HSA] = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
},
};
/* Low bits of any reasonable mm pointer will be unused due to struct
* alignment. Use these bits to make a unique key from the mm pointer
* and notifier type.
*/
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
.release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
.release = amdgpu_hmm_mirror_release
},
};
/**
* amdgpu_mn_get - create notifier context
* amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
* Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
mutex_init(&amn->read_lock);
atomic_set(&amn->recursion, 0);
r = __mmu_notifier_register(&amn->mn, mm);
amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@ -432,7 +404,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
* Registers an MMU notifier for the given BO at the specified address.
* Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
* amdgpu_mn_unregister - unregister a BO for notifier updates
* amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
* Remove any registration of MMU notifier updates from the buffer object.
* Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mutex_unlock(&adev->mn_lock);
}
/* flags used by HMM internal, not related to CPU/GPU PTE flags */
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
(1 << 0), /* HMM_PFN_VALID */
(1 << 1), /* HMM_PFN_WRITE */
0 /* HMM_PFN_DEVICE_PRIVATE */
};
static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
0, /* HMM_PFN_NONE */
0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
};
void amdgpu_hmm_init_range(struct hmm_range *range)
{
if (range) {
range->flags = hmm_range_flags;
range->values = hmm_range_values;
range->pfn_shift = PAGE_SHIFT;
range->pfns = NULL;
INIT_LIST_HEAD(&range->list);
}
}

View file

@ -25,22 +25,24 @@
#define __AMDGPU_MN_H__
/*
* MMU Notifier
* HMM mirror
*/
struct amdgpu_mn;
struct hmm_range;
enum amdgpu_mn_type {
AMDGPU_MN_TYPE_GFX,
AMDGPU_MN_TYPE_HSA,
};
#if defined(CONFIG_MMU_NOTIFIER)
#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
void amdgpu_hmm_init_range(struct hmm_range *range);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
"add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
return -ENODEV;
}
static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}

View file

@ -331,8 +331,6 @@ struct amdgpu_mode_info {
struct drm_property *audio_property;
/* FMT dithering */
struct drm_property *dither_property;
/* maximum number of bits per channel for monitor color */
struct drm_property *max_bpc_property;
/* Adaptive Backlight Modulation (power feature) */
struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */

View file

@ -70,6 +70,15 @@ static const struct cg_flag_name clocks[] = {
{0, NULL},
};
static const struct hwmon_temp_label {
enum PP_HWMON_TEMP channel;
const char *label;
} temp_label[] = {
{PP_TEMP_EDGE, "edge"},
{PP_TEMP_JUNCTION, "junction"},
{PP_TEMP_MEM, "mem"},
};
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
{
if (adev->pm.dpm_enabled) {
@ -761,7 +770,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
pr_debug("featuremask = 0x%llx\n", featuremask);
if (adev->powerplay.pp_funcs->set_ppfeature_status) {
if (is_support_sw_smu(adev)) {
ret = smu_set_ppfeature_status(&adev->smu, featuremask);
if (ret)
return -EINVAL;
} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
if (ret)
return -EINVAL;
@ -777,7 +790,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
if (adev->powerplay.pp_funcs->get_ppfeature_status)
if (is_support_sw_smu(adev)) {
return smu_get_ppfeature_status(&adev->smu, buf);
} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
return amdgpu_dpm_get_ppfeature_status(adev, buf);
return snprintf(buf, PAGE_SIZE, "\n");
@ -1305,6 +1320,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
/**
* DOC: mem_busy_percent
*
* The amdgpu driver provides a sysfs API for reading how busy the VRAM
* is as a percentage. The file mem_busy_percent is used for this.
* The SMU firmware computes a percentage of load based on the
* aggregate activity level in the IP cores.
*/
static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int r, value, size = sizeof(value);
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
(void *)&value, &size);
if (r)
return r;
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
/**
* DOC: pcie_bw
*
@ -1330,6 +1371,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
count0, count1, pcie_get_mps(adev->pdev));
}
/**
* DOC: unique_id
*
* The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
* The file unique_id is used for this.
* This will provide a Unique ID that will persist from machine to machine
*
* NOTE: This will only work for GFX9 and newer. This file will be absent
* on unsupported ASICs (GFX8 and older)
*/
static ssize_t amdgpu_get_unique_id(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
if (adev->unique_id)
return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
return 0;
}
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@ -1374,10 +1438,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL);
static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
amdgpu_get_memory_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
amdgpu_get_ppfeature_status,
amdgpu_set_ppfeature_status);
static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@ -1385,6 +1452,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
int channel = to_sensor_dev_attr(attr)->index;
int r, temp, size = sizeof(temp);
/* Can't get temperature when the card is off */
@ -1392,11 +1460,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
/* get the temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
(void *)&temp, &size);
if (r)
return r;
if (channel >= PP_TEMP_MAX)
return -EINVAL;
switch (channel) {
case PP_TEMP_JUNCTION:
/* get current junction temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
(void *)&temp, &size);
if (r)
return r;
break;
case PP_TEMP_EDGE:
/* get current edge temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
(void *)&temp, &size);
if (r)
return r;
break;
case PP_TEMP_MEM:
/* get current memory temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
(void *)&temp, &size);
if (r)
return r;
break;
}
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@ -1417,6 +1506,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int hyst = to_sensor_dev_attr(attr)->index;
int temp;
if (hyst)
temp = adev->pm.dpm.thermal.min_hotspot_temp;
else
temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int hyst = to_sensor_dev_attr(attr)->index;
int temp;
if (hyst)
temp = adev->pm.dpm.thermal.min_mem_temp;
else
temp = adev->pm.dpm.thermal.max_mem_crit_temp;
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
struct device_attribute *attr,
char *buf)
{
int channel = to_sensor_dev_attr(attr)->index;
if (channel >= PP_TEMP_MAX)
return -EINVAL;
return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
}
static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int channel = to_sensor_dev_attr(attr)->index;
int temp = 0;
if (channel >= PP_TEMP_MAX)
return -EINVAL;
switch (channel) {
case PP_TEMP_JUNCTION:
temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
break;
case PP_TEMP_EDGE:
temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
break;
case PP_TEMP_MEM:
temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
break;
}
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -1986,11 +2145,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* hwmon interfaces for GPU temperature:
*
* - temp1_input: the on die GPU temperature in millidegrees Celsius
* - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
* - temp2_input and temp3_input are supported on SOC15 dGPUs only
*
* - temp1_crit: temperature critical max value in millidegrees Celsius
* - temp[1-3]_label: temperature channel label
* - temp2_label and temp3_label are supported on SOC15 dGPUs only
*
* - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
* - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
* - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
*
* - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
* - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
*
* - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
* - these are supported on SOC15 dGPUs only
*
* hwmon interfaces for GPU voltage:
*
@ -2038,9 +2206,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
*/
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@ -2067,6 +2247,18 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp2_input.dev_attr.attr,
&sensor_dev_attr_temp2_crit.dev_attr.attr,
&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp3_input.dev_attr.attr,
&sensor_dev_attr_temp3_crit.dev_attr.attr,
&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp1_emergency.dev_attr.attr,
&sensor_dev_attr_temp2_emergency.dev_attr.attr,
&sensor_dev_attr_temp3_emergency.dev_attr.attr,
&sensor_dev_attr_temp1_label.dev_attr.attr,
&sensor_dev_attr_temp2_label.dev_attr.attr,
&sensor_dev_attr_temp3_label.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_min.dev_attr.attr,
@ -2189,6 +2381,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
return 0;
/* only SOC15 dGPUs support hotspot and mem temperatures */
if (((adev->flags & AMD_IS_APU) ||
adev->asic_type < CHIP_VEGA10) &&
(attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
return 0;
return effective_mode;
}
@ -2615,6 +2823,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"gpu_busy_level\n");
return ret;
}
/* APU does not have its own dedicated memory */
if (!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev,
&dev_attr_mem_busy_percent);
if (ret) {
DRM_ERROR("failed to create device file "
"mem_busy_percent\n");
return ret;
}
}
/* PCIe Perf counters won't work on APU nodes */
if (!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
@ -2623,6 +2841,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
}
if (adev->unique_id)
ret = device_create_file(adev->dev, &dev_attr_unique_id);
if (ret) {
DRM_ERROR("failed to create device file unique_id\n");
return ret;
}
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@ -2680,8 +2904,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
if (!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
if (!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_pcie_bw);
if (adev->unique_id)
device_remove_file(adev->dev, &dev_attr_unique_id);
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_ppfeatures);
@ -2778,6 +3006,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
/* GPU Load */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
seq_printf(m, "GPU Load: %u %%\n", value);
/* MEM Load */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
seq_printf(m, "MEM Load: %u %%\n", value);
seq_printf(m, "\n");
/* SMC feature mask */

View file

@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)
return ret;
}
static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint32_t id, uint32_t value)
{
cmd->cmd_id = GFX_CMD_ID_PROG_REG;
cmd->cmd.cmd_setup_reg_prog.reg_value = value;
cmd->cmd.cmd_setup_reg_prog.reg_id = id;
}
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
uint32_t value)
{
struct psp_gfx_cmd_resp *cmd = NULL;
int ret = 0;
if (reg >= PSP_REG_LAST)
return -EINVAL;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
uint32_t xgmi_ta_size, uint32_t shared_size)

View file

@ -62,6 +62,14 @@ struct psp_ring
uint32_t ring_size;
};
/* More registers may will be supported */
enum psp_reg_prog_id {
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
PSP_REG_LAST
};
struct psp_funcs
{
int (*init_microcode)(struct psp_context *psp);
@ -95,12 +103,26 @@ struct psp_funcs
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
struct psp_xgmi_topology_info top_info;
};
struct psp_ras_context {
@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
uint32_t value);
#endif

View file

@ -92,6 +92,12 @@ struct ras_manager {
struct ras_err_data err_data;
};
struct ras_badpage {
unsigned int bp;
unsigned int size;
unsigned int flags;
};
const char *ras_error_string[] = {
"none",
"parity",
@ -120,9 +126,16 @@ const char *ras_block_string[] = {
#define ras_err_str(i) (ras_error_string[ffs(i)])
#define ras_block_str(i) (ras_block_string[i])
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr);
static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
struct amdgpu_bo **bo_ptr);
static void amdgpu_ras_self_test(struct amdgpu_device *adev)
{
/* TODO */
@ -239,8 +252,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
/*
* DOC: ras debugfs control interface
/**
* DOC: AMDGPU RAS debugfs control interface
*
* It accepts struct ras_debug_if who has two members.
*
@ -302,6 +315,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct ras_debug_if data;
struct amdgpu_bo *bo;
int ret = 0;
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
@ -319,7 +333,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
ret = amdgpu_ras_reserve_vram(adev,
data.inject.address, PAGE_SIZE, &bo);
/* This address might be used already on failure. In fact we can
* perform an injection in such case.
*/
if (ret)
break;
data.inject.address = amdgpu_bo_gpu_offset(bo);
ret = amdgpu_ras_error_inject(adev, &data.inject);
amdgpu_ras_release_vram(adev, &bo);
break;
default:
ret = -EINVAL;
@ -523,6 +546,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
if (ret == TA_RAS_STATUS__RESET_NEEDED)
return -EAGAIN;
return -EINVAL;
}
@ -543,16 +568,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
return -EINVAL;
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
/* If ras is enabled by vbios, we set up ras object first in
* both case. For enable, that is all what we need do. For
* disable, we need perform a ras TA disable cmd after that.
*/
ret = __amdgpu_ras_feature_enable(adev, head, 1);
if (ret)
return ret;
if (enable) {
/* There is no harm to issue a ras TA cmd regardless of
* the currecnt ras state.
* If current state == target state, it will do nothing
* But sometimes it requests driver to reset and repost
* with error code -EAGAIN.
*/
ret = amdgpu_ras_feature_enable(adev, head, 1);
/* With old ras TA, we might fail to enable ras.
* Log it and just setup the object.
* TODO need remove this WA in the future.
*/
if (ret == -EINVAL) {
ret = __amdgpu_ras_feature_enable(adev, head, 1);
if (!ret)
DRM_INFO("RAS INFO: %s setup object\n",
ras_block_str(head->block));
}
} else {
/* setup the object then issue a ras TA disable cmd.*/
ret = __amdgpu_ras_feature_enable(adev, head, 1);
if (ret)
return ret;
if (!enable)
ret = amdgpu_ras_feature_enable(adev, head, 0);
}
} else
ret = amdgpu_ras_feature_enable(adev, head, enable);
@ -693,6 +734,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
struct ras_badpage **bps, unsigned int *count);
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
switch (flags) {
case 0:
return "R";
case 1:
return "P";
case 2:
default:
return "F";
};
}
/*
* DOC: ras sysfs gpu_vram_bad_pages interface
*
* It allows user to read the bad pages of vram on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
*
* It outputs multiple lines, and each line stands for one gpu page.
*
* The format of one line is below,
* gpu pfn : gpu page size : flags
*
* gpu pfn and gpu page size are printed in hex format.
* flags can be one of below character,
* R: reserved, this gpu page is reserved and not able to use.
* P: pending for reserve, this gpu page is marked as bad, will be reserved
* in next window of page_reserve.
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
*
* examples:
* 0x00000001 : 0x00001000 : R
* 0x00000002 : 0x00001000 : P
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
struct kobject *kobj, struct bin_attribute *attr,
char *buf, loff_t ppos, size_t count)
{
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, badpages_attr);
struct amdgpu_device *adev = con->adev;
const unsigned int element_size =
sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
unsigned int start = div64_ul(ppos + element_size - 1, element_size);
unsigned int end = div64_ul(ppos + count - 1, element_size);
ssize_t s = 0;
struct ras_badpage *bps = NULL;
unsigned int bps_count = 0;
memset(buf, 0, count);
if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
return 0;
for (; start < end && start < bps_count; start++)
s += scnprintf(&buf[s], element_size + 1,
"0x%08x : 0x%08x : %1s\n",
bps[start].bp,
bps[start].size,
amdgpu_ras_badpage_flags_str(bps[start].flags));
kfree(bps);
return s;
}
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@ -733,9 +845,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
&con->features_attr.attr,
NULL
};
struct bin_attribute *bin_attrs[] = {
&con->badpages_attr,
NULL
};
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
.bin_attrs = bin_attrs,
};
con->features_attr = (struct device_attribute) {
@ -745,7 +862,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
},
.show = amdgpu_ras_sysfs_features_read,
};
con->badpages_attr = (struct bin_attribute) {
.attr = {
.name = "gpu_vram_bad_pages",
.mode = S_IRUGO,
},
.size = 0,
.private = NULL,
.read = amdgpu_ras_sysfs_badpages_read,
};
sysfs_attr_init(attrs[0]);
sysfs_bin_attr_init(bin_attrs[0]);
return sysfs_create_group(&adev->dev->kobj, &group);
}
@ -757,9 +886,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
&con->features_attr.attr,
NULL
};
struct bin_attribute *bin_attrs[] = {
&con->badpages_attr,
NULL
};
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
.bin_attrs = bin_attrs,
};
sysfs_remove_group(&adev->dev->kobj, &group);
@ -1091,6 +1225,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
/* ih end */
/* recovery begin */
/* return 0 on success.
* caller need free bps.
*/
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
struct ras_badpage **bps, unsigned int *count)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
int i = 0;
int ret = 0;
if (!con || !con->eh_data || !bps || !count)
return -EINVAL;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
if (!data || data->count == 0) {
*bps = NULL;
goto out;
}
*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
if (!*bps) {
ret = -ENOMEM;
goto out;
}
for (; i < data->count; i++) {
(*bps)[i] = (struct ras_badpage){
.bp = data->bps[i].bp,
.size = AMDGPU_GPU_PAGE_SIZE,
.flags = 0,
};
if (data->last_reserved <= i)
(*bps)[i].flags = 1;
else if (data->bps[i].bo == NULL)
(*bps)[i].flags = 2;
}
*count = data->count;
out:
mutex_unlock(&con->recovery_lock);
return ret;
}
static void amdgpu_ras_do_recovery(struct work_struct *work)
{
struct amdgpu_ras *ras =
@ -1342,6 +1523,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
}
/* recovery end */
/* return 0 if ras will reset gpu and repost.*/
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
unsigned int block)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (!ras)
return -EINVAL;
ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
return 0;
}
/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
@ -1417,8 +1611,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
return -EINVAL;
}
/* do some init work after IP late init as dependence */
void amdgpu_ras_post_init(struct amdgpu_device *adev)
/* do some init work after IP late init as dependence.
* and it runs in resume/gpu reset/booting up cases.
*/
void amdgpu_ras_resume(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
@ -1446,6 +1642,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
}
}
}
if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
/* setup ras obj state as disabled.
* for init_by_vbios case.
* if we want to enable ras, just enable it in a normal way.
* If we want do disable it, need setup ras obj as enabled,
* then issue another TA disable cmd.
* See feature_enable_on_boot
*/
amdgpu_ras_disable_all_features(adev, 1);
amdgpu_ras_reset_gpu(adev, 0);
}
}
void amdgpu_ras_suspend(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!con)
return;
amdgpu_ras_disable_all_features(adev, 0);
/* Make sure all ras objects are disabled. */
if (con->features)
amdgpu_ras_disable_all_features(adev, 1);
}
/* do some fini work before IP fini as dependence */

View file

@ -93,6 +93,7 @@ struct amdgpu_ras {
struct dentry *ent;
/* sysfs */
struct device_attribute features_attr;
struct bin_attribute badpages_attr;
/* block array */
struct ras_manager *objs;
@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
return ras && (ras->supported & (1 << block));
}
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
unsigned int block);
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce);
@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
bool is_baco)
{
/* remove me when gpu reset works on vega20 A1. */
#if 0
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work);
#endif
return 0;
}
@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
/* called in ip_init and ip_fini */
int amdgpu_ras_init(struct amdgpu_device *adev);
void amdgpu_ras_post_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);

View file

@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
uint32_t align_mask;
u32 nop;
bool support_64bit_ptrs;
bool no_user_fence;
unsigned vmhub;
unsigned extra_dw;

View file

@ -32,6 +32,7 @@
#include <linux/dma-mapping.h>
#include <linux/iommu.h>
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
#include <linux/seq_file.h>
@ -47,6 +48,7 @@
#include <drm/drm_debugfs.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
@ -707,100 +709,177 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
/*
* TTM backend functions.
*/
struct amdgpu_ttm_gup_task_list {
struct list_head list;
struct task_struct *task;
};
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
atomic_t mmu_invalidations;
uint32_t last_set_pages;
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
struct hmm_range *ranges;
int nr_ranges;
#endif
};
/**
* amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
* pointer to memory
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
*
* Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
* This provides a wrapper around the get_user_pages() call to provide
* device accessible pages that back user memory.
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
/* Support Userptr pages cross max 16 vmas */
#define MAX_NR_VMAS (16)
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
unsigned int flags = 0;
unsigned pinned = 0;
int r;
unsigned long start = gtt->userptr;
unsigned long end = start + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
struct hmm_range *ranges;
unsigned long nr_pages, i;
uint64_t *pfns, f;
int r = 0;
if (!mm) /* Happens during process shutdown */
return -ESRCH;
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
flags |= FOLL_WRITE;
down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/*
* check that we only use anonymous memory to prevent problems
* with writeback
*/
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
/* user pages may cross multiple VMAs */
gtt->nr_ranges = 0;
do {
unsigned long vm_start;
vma = find_vma(mm, gtt->userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
up_read(&mm->mmap_sem);
return -EPERM;
if (gtt->nr_ranges >= MAX_NR_VMAS) {
DRM_ERROR("Too many VMAs in userptr range\n");
r = -EFAULT;
goto out;
}
vm_start = vma ? vma->vm_end : start;
vma = find_vma(mm, vm_start);
if (unlikely(!vma || vm_start < vma->vm_start)) {
r = -EFAULT;
goto out;
}
vmas[gtt->nr_ranges++] = vma;
} while (end > vma->vm_end);
DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
start, gtt->nr_ranges, ttm->num_pages);
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
vmas[0]->vm_file)) {
r = -EPERM;
goto out;
}
/* loop enough times using contiguous pages of memory */
do {
unsigned num_pages = ttm->num_pages - pinned;
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
struct page **p = pages + pinned;
struct amdgpu_ttm_gup_task_list guptask;
ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
if (unlikely(!ranges)) {
r = -ENOMEM;
goto out;
}
guptask.task = current;
spin_lock(&gtt->guptasklock);
list_add(&guptask.list, &gtt->guptasks);
spin_unlock(&gtt->guptasklock);
pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
if (unlikely(!pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
if (mm == current->mm)
r = get_user_pages(userptr, num_pages, flags, p, NULL);
else
r = get_user_pages_remote(gtt->usertask,
mm, userptr, num_pages,
flags, p, NULL, NULL);
for (i = 0; i < gtt->nr_ranges; i++)
amdgpu_hmm_init_range(&ranges[i]);
spin_lock(&gtt->guptasklock);
list_del(&guptask.list);
spin_unlock(&gtt->guptasklock);
f = ranges[0].flags[HMM_PFN_VALID];
f |= amdgpu_ttm_tt_is_readonly(ttm) ?
0 : ranges[0].flags[HMM_PFN_WRITE];
memset64(pfns, f, ttm->num_pages);
if (r < 0)
goto release_pages;
for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
ranges[i].vma = vmas[i];
ranges[i].start = max(start, vmas[i]->vm_start);
ranges[i].end = min(end, vmas[i]->vm_end);
ranges[i].pfns = pfns + nr_pages;
nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
pinned += r;
r = hmm_vma_fault(&ranges[i], true);
if (unlikely(r))
break;
}
if (unlikely(r)) {
while (i--)
hmm_vma_range_done(&ranges[i]);
} while (pinned < ttm->num_pages);
goto out_free_pfns;
}
up_read(&mm->mmap_sem);
for (i = 0; i < ttm->num_pages; i++) {
pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
if (!pages[i]) {
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
i, pfns[i]);
goto out_invalid_pfn;
}
}
gtt->ranges = ranges;
return 0;
release_pages:
release_pages(pages, pinned);
out_free_pfns:
kvfree(pfns);
out_free_ranges:
kvfree(ranges);
out:
up_read(&mm->mmap_sem);
return r;
out_invalid_pfn:
for (i = 0; i < gtt->nr_ranges; i++)
hmm_vma_range_done(&ranges[i]);
kvfree(pfns);
kvfree(ranges);
return -ENOMEM;
}
/**
* amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
* Check if the pages backing this ttm range have been invalidated
*
* Returns: true if pages are still valid
*/
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
bool r = false;
int i;
if (!gtt || !gtt->userptr)
return false;
DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
gtt->userptr, gtt->nr_ranges, ttm->num_pages);
WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
"No user pages to check\n");
if (gtt->ranges) {
for (i = 0; i < gtt->nr_ranges; i++)
r |= hmm_vma_range_done(&gtt->ranges[i]);
kvfree(gtt->ranges[0].pfns);
kvfree(gtt->ranges);
gtt->ranges = NULL;
}
return r;
}
#endif
/**
* amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
@ -811,39 +890,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
*/
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
for (i = 0; i < ttm->num_pages; ++i) {
if (ttm->pages[i])
put_page(ttm->pages[i]);
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
ttm->pages[i] = pages ? pages[i] : NULL;
}
}
/**
* amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
*
* Called while unpinning userptr pages
*/
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
for (i = 0; i < ttm->num_pages; ++i) {
struct page *page = ttm->pages[i];
if (!page)
continue;
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
set_page_dirty(page);
mark_page_accessed(page);
}
}
/**
@ -905,10 +955,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
/* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
/* mark the pages as dirty */
amdgpu_ttm_tt_mark_user_pages(ttm);
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
if (gtt->ranges &&
ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
gtt->ranges[0].pfns[0]))
WARN_ONCE(1, "Missing get_user_page_done\n");
#endif
}
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@ -1258,11 +1312,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->usertask = current->group_leader;
get_task_struct(gtt->usertask);
spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
gtt->last_set_pages = 0;
return 0;
}
@ -1291,7 +1340,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@ -1304,48 +1352,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
/* Search the lists of tasks that hold this mapping and see
* if current is one of them. If it is return false.
*/
spin_lock(&gtt->guptasklock);
list_for_each_entry(entry, &gtt->guptasks, list) {
if (entry->task == current) {
spin_unlock(&gtt->guptasklock);
return false;
}
}
spin_unlock(&gtt->guptasklock);
atomic_inc(&gtt->mmu_invalidations);
return true;
}
/**
* amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
* amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
*/
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int prev_invalidated = *last_invalidated;
*last_invalidated = atomic_read(&gtt->mmu_invalidations);
return prev_invalidated != *last_invalidated;
}
/**
* amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
* been invalidated since the last time they've been set?
*/
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL || !gtt->userptr)
return false;
return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
return true;
}
/**
@ -1757,44 +1777,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Initialize various on-chip memory pools */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.total_size);
adev->gds.gds_size);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
4, AMDGPU_GEM_DOMAIN_GDS,
&adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
adev->gds.gws.total_size);
adev->gds.gws_size);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1, AMDGPU_GEM_DOMAIN_GWS,
&adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
adev->gds.oa.total_size);
adev->gds.oa_size);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1, AMDGPU_GEM_DOMAIN_OA,
&adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
/* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {

View file

@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
return -EPERM;
}
static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
return false;
}
#endif
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,

View file

@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
}
#define FW_VERSION_ATTR(name, mode, field) \
static ssize_t show_##name(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct drm_device *ddev = dev_get_drvdata(dev); \
struct amdgpu_device *adev = ddev->dev_private; \
\
return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
} \
static DEVICE_ATTR(name, mode, show_##name, NULL)
FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
static struct attribute *fw_attrs[] = {
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
&dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
&dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
&dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
&dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
&dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
&dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
&dev_attr_dmcu_fw_version.attr, NULL
};
static const struct attribute_group fw_attr_group = {
.name = "fw_version",
.attrs = fw_attrs
};
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
{
return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
}
void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
{
sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
}
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
struct amdgpu_firmware_info *ucode,
uint64_t mc_addr, void *kptr)

View file

@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);

View file

@ -213,132 +213,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
return 0;
}
static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
struct dpg_pause_state *new_state)
{
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
struct amdgpu_ring *ring;
/* pause/unpause if state is changed */
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) {
/* pause DPG non-jpeg */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
ring = &adev->vcn.ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
ring = &adev->vcn.ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg non-jpeg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
adev->vcn.pause_state.fw_based = new_state->fw_based;
}
/* pause/unpause if state is changed */
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) {
/* Make sure JPRG Snoop is disabled before sending the pause */
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
/* pause DPG jpeg */
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
ring = &adev->vcn.ring_jpeg;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg jpeg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
adev->vcn.pause_state.jpeg = new_state->jpeg;
}
return 0;
}
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
@ -363,7 +237,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
adev->vcn.pause_dpg_mode(adev, &new_state);
}
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
@ -418,7 +292,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
adev->vcn.pause_dpg_mode(adev, &new_state);
}
}

View file

@ -45,6 +45,27 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
})
#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
do { \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
} while (0)
enum engine_status_constants {
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
@ -81,6 +102,8 @@ struct amdgpu_vcn {
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
struct dpg_pause_state pause_state;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
struct dpg_pause_state *new_state);
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);

View file

@ -430,3 +430,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
return clk;
}
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
if (virt->ops && virt->ops->init_reg_access_mode)
virt->ops->init_reg_access_mode(adev);
}
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
{
bool ret = false;
struct amdgpu_virt *virt = &adev->virt;
if (amdgpu_sriov_vf(adev)
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
ret = true;
return ret;
}
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
{
bool ret = false;
struct amdgpu_virt *virt = &adev->virt;
if (amdgpu_sriov_vf(adev)
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
&& !(amdgpu_sriov_runtime(adev)))
ret = true;
return ret;
}
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
{
bool ret = false;
struct amdgpu_virt *virt = &adev->virt;
if (amdgpu_sriov_vf(adev)
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
ret = true;
return ret;
}

View file

@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
};
/* According to the fw feature, some new reg access modes are supported */
#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
void (*init_reg_access_mode)(struct amdgpu_device *adev);
};
/*
@ -258,6 +265,7 @@ struct amdgpu_virt {
uint32_t gim_feature;
/* protect DPM events to GIM */
struct mutex dpm_mutex;
uint32_t reg_access_mode;
};
#define amdgpu_sriov_enabled(adev) \
@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
#endif

View file

@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
return &hive->device_list;
}
/**
* DOC: AMDGPU XGMI Support
*
* XGMI is a high speed interconnect that joins multiple GPU cards
* into a homogeneous memory space that is organized by a collective
* hive ID and individual node IDs, both of which are 64-bit numbers.
*
* The file xgmi_device_id contains the unique per GPU device ID and
* is stored in the /sys/class/drm/card${cardno}/device/ directory.
*
* Inside the device directory a sub-directory 'xgmi_hive_info' is
* created which contains the hive ID and the list of nodes.
*
* The hive ID is stored in:
* /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
*
* The node information is stored in numbered directories:
* /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
*
* Each device has their own xgmi_hive_info direction with a mirror
* set of node sub-directories.
*
* The XGMI memory space is built by contiguously adding the power of
* two padded VRAM space from each node to each other.
*
*/
static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
struct device_attribute *attr, char *buf)
{
@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices,
&hive->topology_info);
&adev->psp.xgmi_context.top_info);
if (ret)
dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
return ret;
}
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
return top->nodes[i].num_hops;
return -EINVAL;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info *hive_topology;
struct psp_xgmi_topology_info *top_info;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL;
@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
hive_topology = &hive->topology_info;
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
hive_topology->nodes[count++].node_id = entry->node_id;
top_info->nodes[count++].node_id = entry->node_id;
top_info->num_nodes = count;
hive->number_devices = count;
/* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
/* update node list for other device in the hive */
if (tmp_adev != adev) {
top_info = &tmp_adev->psp.xgmi_context.top_info;
top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
top_info->num_nodes = count;
}
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
if (ret)
goto exit;
}
/* get latest topology info for each device from psp */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
&tmp_adev->psp.xgmi_context.top_info);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.node_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
break;
goto exit;
}
}
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
if (ret)
break;
}
if (!ret)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
mutex_unlock(&hive->hive_lock);
exit:
if (!ret)
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
ret);
mutex_unlock(&hive->hive_lock);
exit:
return ret;
}

View file

@ -27,7 +27,6 @@
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
struct psp_xgmi_topology_info topology_info;
int number_devices;
struct mutex hive_lock, reset_lock;
struct kobject *kobj;
@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)

View file

@ -1805,6 +1805,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
{
uint64_t nak_r, nak_g;
/* Get the number of NAKs received and generated */
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
/* Add the total number of NAKs, i.e the number of replays */
return (nak_r + nak_g);
}
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@ -1822,6 +1834,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &cik_get_pcie_usage,
.need_reset_on_init = &cik_need_reset_on_init,
.get_pcie_replay_count = &cik_get_pcie_replay_count,
};
static int cik_common_early_init(void *handle)

View file

@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_DF_MGCG;
}
/* hold counter assignment per gpu struct */
struct df_v3_6_event_mask {
struct amdgpu_device gpu;
uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
};
/* get assigned df perfmon ctr as int */
static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
uint64_t config,
int *counter)
{
struct df_v3_6_event_mask *mask;
int i;
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
*counter = i;
return;
}
}
}
/* get address based on counter assignment */
static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
uint64_t config,
int is_ctrl,
uint32_t *lo_base_addr,
uint32_t *hi_base_addr)
{
int target_cntr = -1;
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
if (target_cntr < 0)
return;
switch (target_cntr) {
case 0:
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
break;
case 1:
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
break;
case 2:
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
break;
case 3:
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
break;
}
}
/* get read counter address */
static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
uint64_t config,
uint32_t *lo_base_addr,
uint32_t *hi_base_addr)
{
df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
}
/* get control counter settings i.e. address and values to set */
static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
uint64_t config,
uint32_t *lo_base_addr,
uint32_t *hi_base_addr,
uint32_t *lo_val,
uint32_t *hi_val)
{
uint32_t eventsel, instance, unitmask;
uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;
df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
if (lo_val == NULL || hi_val == NULL)
return;
if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
*lo_base_addr, *hi_base_addr);
return;
}
eventsel = GET_EVENT(config);
instance = GET_INSTANCE(config);
unitmask = GET_UNITMASK(config);
es_5_0 = eventsel & 0x3FUL;
es_13_6 = instance;
es_13_0 = (es_13_6 << 6) + es_5_0;
es_13_12 = (es_13_0 & 0x03000UL) >> 12;
es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
es_7_0 = es_13_0 & 0x0FFUL;
*lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
*hi_val = (es_11_8 | ((es_13_12)<<(29)));
}
/* assign df performance counters for read */
static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
uint64_t config,
int *is_assigned)
{
struct df_v3_6_event_mask *mask;
int i, target_cntr;
target_cntr = -1;
*is_assigned = 0;
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
if (target_cntr >= 0) {
*is_assigned = 1;
return 0;
}
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
if (mask->config_assign_mask[i] == 0ULL) {
mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
return 0;
}
}
return -ENOSPC;
}
/* release performance counter */
static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
uint64_t config)
{
struct df_v3_6_event_mask *mask;
int target_cntr;
target_cntr = -1;
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
if (target_cntr >= 0)
mask->config_assign_mask[target_cntr] = 0ULL;
}
/*
* get xgmi link counters via programmable data fabric (df) counters (max 4)
* using cake tx event.
*
* @adev -> amdgpu device
* @instance-> currently cake has 2 links to poll on vega20
* @count -> counters to pass
*
*/
static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
int instance,
uint64_t *count)
{
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
uint64_t config;
config = GET_INSTANCE_CONFIG(instance);
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
&hi_base_addr);
if ((lo_base_addr == 0) || (hi_base_addr == 0))
return;
lo_val = RREG32_PCIE(lo_base_addr);
hi_val = RREG32_PCIE(hi_base_addr);
*count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
}
/*
* reset xgmi link counters
*
* @adev -> amdgpu device
* @instance-> currently cake has 2 links to poll on vega20
*
*/
static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
int instance)
{
uint32_t lo_base_addr, hi_base_addr;
uint64_t config;
config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
&hi_base_addr);
if ((lo_base_addr == 0) || (hi_base_addr == 0))
return;
WREG32_PCIE(lo_base_addr, 0UL);
WREG32_PCIE(hi_base_addr, 0UL);
}
/*
* add xgmi link counters
*
* @adev -> amdgpu device
* @instance-> currently cake has 2 links to poll on vega20
*
*/
static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
int instance)
{
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
uint64_t config;
int ret, is_assigned;
if (instance < 0 || instance > 1)
return -EINVAL;
config = GET_INSTANCE_CONFIG(instance);
ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
if (ret || is_assigned)
return ret;
df_v3_6_pmc_get_ctrl_settings(adev,
config,
&lo_base_addr,
&hi_base_addr,
&lo_val,
&hi_val);
WREG32_PCIE(lo_base_addr, lo_val);
WREG32_PCIE(hi_base_addr, hi_val);
return ret;
}
/*
* start xgmi link counters
*
* @adev -> amdgpu device
* @instance-> currently cake has 2 links to poll on vega20
* @is_enable -> either resume or assign event via df perfmon
*
*/
static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
int instance,
int is_enable)
{
uint32_t lo_base_addr, hi_base_addr, lo_val;
uint64_t config;
int ret;
if (instance < 0 || instance > 1)
return -EINVAL;
if (is_enable) {
ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
if (ret)
return ret;
} else {
config = GET_INSTANCE_CONFIG(instance);
df_v3_6_pmc_get_ctrl_settings(adev,
config,
&lo_base_addr,
&hi_base_addr,
NULL,
NULL);
if (lo_base_addr == 0)
return -EINVAL;
lo_val = RREG32_PCIE(lo_base_addr);
WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
ret = 0;
}
return ret;
}
/*
* start xgmi link counters
*
* @adev -> amdgpu device
* @instance-> currently cake has 2 links to poll on vega20
* @is_enable -> either pause or unassign event via df perfmon
*
*/
static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
int instance,
int is_disable)
{
uint32_t lo_base_addr, hi_base_addr, lo_val;
uint64_t config;
config = GET_INSTANCE_CONFIG(instance);
if (is_disable) {
df_v3_6_reset_xgmi_link_cntr(adev, instance);
df_v3_6_pmc_release_cntr(adev, config);
} else {
df_v3_6_pmc_get_ctrl_settings(adev,
config,
&lo_base_addr,
&hi_base_addr,
NULL,
NULL);
if ((lo_base_addr == 0) || (hi_base_addr == 0))
return -EINVAL;
lo_val = RREG32_PCIE(lo_base_addr);
WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
}
return 0;
}
static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
int is_enable)
{
int xgmi_tx_link, ret = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
if (xgmi_tx_link >= 0)
ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
is_enable);
if (ret)
return ret;
ret = 0;
break;
default:
break;
}
return ret;
}
static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
int is_disable)
{
int xgmi_tx_link, ret = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
if (xgmi_tx_link >= 0) {
ret = df_v3_6_stop_xgmi_link_cntr(adev,
xgmi_tx_link,
is_disable);
if (ret)
return ret;
}
ret = 0;
break;
default:
break;
}
return ret;
}
static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
uint64_t config,
uint64_t *count)
{
int xgmi_tx_link;
switch (adev->asic_type) {
case CHIP_VEGA20:
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
if (xgmi_tx_link >= 0) {
df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
}
break;
default:
break;
}
}
const struct amdgpu_df_funcs df_v3_6_funcs = {
.init = df_v3_6_init,
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
.update_medium_grain_clock_gating =
df_v3_6_update_medium_grain_clock_gating,
.get_clockgating_state = df_v3_6_get_clockgating_state,
.pmc_start = df_v3_6_pmc_start,
.pmc_stop = df_v3_6_pmc_stop,
.pmc_get_count = df_v3_6_pmc_get_count
};

View file

@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
};
/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
#define AMDGPU_DF_MAX_COUNTERS 4
/* get flags from df perfmon config */
#define GET_EVENT(x) (x & 0xFFUL)
#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \
| ((0x046ULL + x) << 8) \
| (0x02 << 16))
/* df event conf macros */
#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
&& GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
&& GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
extern const struct amdgpu_df_funcs df_v3_6_funcs;
#endif

View file

@ -4495,12 +4495,8 @@ static int gfx_v7_0_sw_init(void *handle)
static int gfx_v7_0_sw_fini(void *handle)
{
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@ -5072,30 +5068,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
adev->gds.gws.total_size = 64;
adev->gds.oa.total_size = 16;
adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
if (adev->gds.mem.total_size == 64 * 1024) {
adev->gds.mem.gfx_partition_size = 4096;
adev->gds.mem.cs_partition_size = 4096;
adev->gds.gws.gfx_partition_size = 4;
adev->gds.gws.cs_partition_size = 4;
adev->gds.oa.gfx_partition_size = 4;
adev->gds.oa.cs_partition_size = 1;
} else {
adev->gds.mem.gfx_partition_size = 1024;
adev->gds.mem.cs_partition_size = 1024;
adev->gds.gws.gfx_partition_size = 16;
adev->gds.gws.cs_partition_size = 16;
adev->gds.oa.gfx_partition_size = 4;
adev->gds.oa.cs_partition_size = 4;
}
}

View file

@ -2061,12 +2061,8 @@ static int gfx_v8_0_sw_init(void *handle)
static int gfx_v8_0_sw_fini(void *handle)
{
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@ -7014,30 +7010,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
adev->gds.gws.total_size = 64;
adev->gds.oa.total_size = 16;
adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
if (adev->gds.mem.total_size == 64 * 1024) {
adev->gds.mem.gfx_partition_size = 4096;
adev->gds.mem.cs_partition_size = 4096;
adev->gds.gws.gfx_partition_size = 4;
adev->gds.gws.cs_partition_size = 4;
adev->gds.oa.gfx_partition_size = 4;
adev->gds.oa.cs_partition_size = 1;
} else {
adev->gds.mem.gfx_partition_size = 1024;
adev->gds.mem.cs_partition_size = 1024;
adev->gds.gws.gfx_partition_size = 16;
adev->gds.gws.cs_partition_size = 16;
adev->gds.oa.gfx_partition_size = 4;
adev->gds.oa.cs_partition_size = 4;
}
}
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,

View file

@ -38,6 +38,7 @@
#include "vega10_enum.h"
#include "hdp/hdp_4_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
#include "clearstate_gfx9.h"
#include "v9_structs.h"
@ -311,12 +312,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
ARRAY_SIZE(golden_settings_gc_9_0));
soc15_program_register_sequence(adev,
golden_settings_gc_9_0_vg10,
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
if (!amdgpu_virt_support_skip_setting(adev)) {
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
ARRAY_SIZE(golden_settings_gc_9_0));
soc15_program_register_sequence(adev,
golden_settings_gc_9_0_vg10,
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
}
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@ -1462,8 +1465,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
/* GDS reserve memory: 64 bytes alignment */
adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
@ -1571,7 +1573,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
(adev->gds.mem.total_size +
(adev->gds.gds_size +
adev->gfx.ngg.gds_reserve_size));
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
@ -1785,10 +1787,6 @@ static int gfx_v9_0_sw_fini(void *handle)
kfree(ras_if);
}
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@ -1800,9 +1798,7 @@ static int gfx_v9_0_sw_fini(void *handle)
gfx_v9_0_mec_fini(adev);
gfx_v9_0_ngg_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
if (adev->asic_type == CHIP_RAVEN) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
@ -1838,7 +1834,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
}
static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
@ -1906,8 +1902,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
soc15_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
}
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@ -1918,7 +1914,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v9_0_tiling_mode_table_init(adev);
@ -1935,17 +1931,17 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
} else {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 48));
tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
(adev->gmc.shared_aperture_start >> 48));
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
}
}
soc15_grbm_select(adev, 0, 0, 0, 0);
@ -1961,7 +1957,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
*/
gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend <<
PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
(adev->gfx.config.sc_prim_fifo_size_backend <<
@ -2028,11 +2024,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
/* csib */
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
adev->gfx.rlc.clear_state_size);
}
@ -2502,7 +2498,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
udelay(50);
}
@ -2700,9 +2696,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
int i;
if (enable) {
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].sched.ready = false;
@ -2763,9 +2759,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
tmp |= 0x80;
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
}
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
@ -2983,67 +2979,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
mqd->cp_hqd_eop_base_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
mqd->cp_hqd_eop_base_addr_hi);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
mqd->cp_hqd_eop_control);
/* enable doorbell? */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
mqd->cp_hqd_dequeue_request);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
mqd->cp_hqd_pq_rptr);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
mqd->cp_hqd_pq_wptr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
mqd->cp_hqd_pq_wptr_hi);
}
/* set the pointer to the MQD */
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
mqd->cp_mqd_base_addr_hi);
/* set MQD vmid to 0 */
WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
mqd->cp_mqd_control);
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
mqd->cp_hqd_pq_base_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
mqd->cp_hqd_pq_base_hi);
/* set up the HQD, this is similar to CP_RB0_CNTL */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
mqd->cp_hqd_pq_control);
/* set the wb address whether it's enabled or not */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->cp_hqd_pq_rptr_report_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->cp_hqd_pq_rptr_report_addr_hi);
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
mqd->cp_hqd_pq_wptr_poll_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
/* enable the doorbell if requested */
@ -3054,23 +3050,23 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
(adev->doorbell_index.userqueue_end * 2) << 2);
}
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
mqd->cp_hqd_pq_wptr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
mqd->cp_hqd_pq_wptr_hi);
/* set the vmid for the queue */
WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
mqd->cp_hqd_persistent_state);
/* activate the queue */
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
mqd->cp_hqd_active);
if (ring->use_doorbell)
@ -3087,7 +3083,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
@ -3099,21 +3095,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
DRM_DEBUG("KIQ dequeue request failed.\n");
/* Manual disable if dequeue request times out */
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
}
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
0);
}
WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
return 0;
}
@ -3533,6 +3529,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
static const u32 vgpr_init_compute_shader[] =
{
0xb07c0000, 0xbe8000ff,
0x000000f8, 0xbf110800,
0x7e000280, 0x7e020280,
0x7e040280, 0x7e060280,
0x7e080280, 0x7e0a0280,
0x7e0c0280, 0x7e0e0280,
0x80808800, 0xbe803200,
0xbf84fff5, 0xbf9c0000,
0xd28c0001, 0x0001007f,
0xd28d0001, 0x0002027e,
0x10020288, 0xb8810904,
0xb7814000, 0xd1196a01,
0x00000301, 0xbe800087,
0xbefc00c1, 0xd89c4000,
0x00020201, 0xd89cc080,
0x00040401, 0x320202ff,
0x00000800, 0x80808100,
0xbf84fff8, 0x7e020280,
0xbf810000, 0x00000000,
};
static const u32 sgpr_init_compute_shader[] =
{
0xb07c0000, 0xbe8000ff,
0x0000005f, 0xbee50080,
0xbe812c65, 0xbe822c65,
0xbe832c65, 0xbe842c65,
0xbe852c65, 0xb77c0005,
0x80808500, 0xbf84fff8,
0xbe800080, 0xbf810000,
};
static const struct soc15_reg_entry vgpr_init_regs[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
};
static const struct soc15_reg_entry sgpr_init_regs[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
};
static const struct soc15_reg_entry sec_ded_counter_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
{ SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
{ SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
{ SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
};
static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
int r, i, j;
unsigned total_size, vgpr_offset, sgpr_offset;
u64 gpu_addr;
/* only support when RAS is enabled */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return 0;
/* bail if the compute ring is not ready */
if (!ring->sched.ready)
return 0;
total_size =
((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
total_size +=
((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
total_size = ALIGN(total_size, 256);
vgpr_offset = total_size;
total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
sgpr_offset = total_size;
total_size += sizeof(sgpr_init_compute_shader);
/* allocate an indirect buffer to put the commands in */
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, total_size, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
return r;
}
/* load the compute shaders */
for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
/* init the ib length to 0 */
ib.length_dw = 0;
/* VGPR */
/* write the register state for the compute dispatch */
for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
- PACKET3_SET_SH_REG_START;
ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
}
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
- PACKET3_SET_SH_REG_START;
ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
/* write dispatch packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
ib.ptr[ib.length_dw++] = 128; /* x */
ib.ptr[ib.length_dw++] = 1; /* y */
ib.ptr[ib.length_dw++] = 1; /* z */
ib.ptr[ib.length_dw++] =
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
/* write CS partial flush packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
/* SGPR */
/* write the register state for the compute dispatch */
for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
- PACKET3_SET_SH_REG_START;
ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
}
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
- PACKET3_SET_SH_REG_START;
ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
/* write dispatch packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
ib.ptr[ib.length_dw++] = 128; /* x */
ib.ptr[ib.length_dw++] = 1; /* y */
ib.ptr[ib.length_dw++] = 1; /* z */
ib.ptr[ib.length_dw++] =
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
/* write CS partial flush packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
/* shedule the ib on the ring */
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r) {
DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
goto fail;
}
/* wait for the GPU to finish processing the IB */
r = dma_fence_wait(f, false);
if (r) {
DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
goto fail;
}
/* read back registers to clear the counters */
mutex_lock(&adev->grbm_idx_mutex);
for (j = 0; j < 16; j++) {
gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
}
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
mutex_unlock(&adev->grbm_idx_mutex);
fail:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
return r;
}
static int gfx_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -3574,8 +3805,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)
return 0;
}
if (*ras_if)
/* requires IBs so do in late init after IB pool is initialized */
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
if (r)
return r;
/* handle resume path. */
if (*ras_if) {
/* resend ras TA enable cmd during resume.
* prepare to handle failure.
*/
ih_info.head = **ras_if;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r) {
if (r == -EAGAIN) {
/* request a gpu reset. will run again. */
amdgpu_ras_request_reset_on_boot(adev,
AMDGPU_RAS_BLOCK__GFX);
return 0;
}
/* fail to enable ras, cleanup all. */
goto irq;
}
/* enable successfully. continue. */
goto resume;
}
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@ -3584,8 +3838,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r)
if (r) {
if (r == -EAGAIN) {
amdgpu_ras_request_reset_on_boot(adev,
AMDGPU_RAS_BLOCK__GFX);
r = 0;
}
goto feature;
}
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@ -3618,7 +3878,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
feature:
kfree(*ras_if);
*ras_if = NULL;
return -EINVAL;
return r;
}
static int gfx_v9_0_late_init(void *handle)
@ -4323,8 +4583,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@ -5060,13 +5320,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
adev->gds.mem.total_size = 0x10000;
adev->gds.gds_size = 0x10000;
break;
case CHIP_RAVEN:
adev->gds.mem.total_size = 0x1000;
adev->gds.gds_size = 0x1000;
break;
default:
adev->gds.mem.total_size = 0x10000;
adev->gds.gds_size = 0x10000;
break;
}
@ -5090,28 +5350,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
break;
}
adev->gds.gws.total_size = 64;
adev->gds.oa.total_size = 16;
if (adev->gds.mem.total_size == 64 * 1024) {
adev->gds.mem.gfx_partition_size = 4096;
adev->gds.mem.cs_partition_size = 4096;
adev->gds.gws.gfx_partition_size = 4;
adev->gds.gws.cs_partition_size = 4;
adev->gds.oa.gfx_partition_size = 4;
adev->gds.oa.cs_partition_size = 1;
} else {
adev->gds.mem.gfx_partition_size = 1024;
adev->gds.mem.cs_partition_size = 1024;
adev->gds.gws.gfx_partition_size = 16;
adev->gds.gws.cs_partition_size = 16;
adev->gds.oa.gfx_partition_size = 4;
adev->gds.oa.cs_partition_size = 4;
}
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
}
static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,

View file

@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
/* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
* workaround that increase system aperture high address (add 1)
* to get rid of the VM fault and hardware hang.
*/
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max((adev->gmc.fb_end >> 18) + 0x1,
adev->gmc.agp_end >> 18));
else
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
@ -146,12 +146,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp);
WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
@ -163,12 +163,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp);
WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL4, tmp);
}
static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
adev->gmc.vram_end >> 24);
}
@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
0);
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);

View file

@ -292,7 +292,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
* Load the GDDR MC ucode into the hw (CIK).
* Load the GDDR MC ucode into the hw (VI).
* Returns 0 on success, error on failure.
*/
static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
@ -446,7 +446,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
*
* Set the location of vram, gart, and AGP in the GPU's
* physical address space (CIK).
* physical address space (VI).
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
@ -518,7 +518,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
*
* Look up the amount of vram, vram width, and decide how to place
* vram and gart within the GPU's physical address space (CIK).
* vram and gart within the GPU's physical address space (VI).
* Returns 0 for success.
*/
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
@ -633,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
* Flush the TLB for the requested page table (CIK).
* Flush the TLB for the requested page table (VI).
*/
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
@ -803,7 +803,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
* This sets up the TLBs, programs the page tables for VMID0,
* sets up the hw for VMIDs 1-15 which are allocated on
* demand, and sets up the global locations for the LDS, GDS,
* and GPUVM for FSA64 clients (CIK).
* and GPUVM for FSA64 clients (VI).
* Returns 0 for success, errors for failure.
*/
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
@ -951,7 +951,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
* This disables all VM page table (CIK).
* This disables all VM page table (VI).
*/
static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
{
@ -981,7 +981,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
*
* Print human readable fault information (CIK).
* Print human readable fault information (VI).
*/
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
u32 addr, u32 mc_client, unsigned pasid)

View file

@ -691,8 +691,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
return 0;
}
/* handle resume path. */
if (*ras_if)
if (*ras_if) {
/* resend ras TA enable cmd during resume.
* prepare to handle failure.
*/
ih_info.head = **ras_if;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r) {
if (r == -EAGAIN) {
/* request a gpu reset. will run again. */
amdgpu_ras_request_reset_on_boot(adev,
AMDGPU_RAS_BLOCK__UMC);
return 0;
}
/* fail to enable ras, cleanup all. */
goto irq;
}
/* enable successfully. continue. */
goto resume;
}
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@ -701,8 +718,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r)
if (r) {
if (r == -EAGAIN) {
amdgpu_ras_request_reset_on_boot(adev,
AMDGPU_RAS_BLOCK__UMC);
r = 0;
}
goto feature;
}
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@ -735,7 +758,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
feature:
kfree(*ras_if);
*ras_if = NULL;
return -EINVAL;
return r;
}
@ -1104,6 +1127,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
if (amdgpu_virt_support_skip_setting(adev))
break;
/* fall through */
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
@ -1168,6 +1194,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
/* After HDP is initialized, flush HDP.*/
adev->nbio_funcs->hdp_flush(adev, NULL);

View file

@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
if (amdgpu_virt_support_skip_setting(adev))
return;
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
if (amdgpu_virt_support_skip_setting(adev))
return;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
if (amdgpu_virt_support_skip_setting(adev))
return;
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0XFFFFFFFF);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
if (!amdgpu_virt_support_skip_setting(adev)) {
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
}
}
/**
@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
if (amdgpu_virt_support_skip_setting(adev))
return;
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);

View file

@ -26,6 +26,7 @@
#include "nbio/nbio_6_1_sh_mask.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "mp/mp_9_0_offset.h"
#include "soc15.h"
#include "vega10_ih.h"
#include "soc15_common.h"
@ -343,7 +344,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
&& adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
amdgpu_device_gpu_recover(adev, NULL);
}
@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
{
uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
if (rlc_fw_ver >= 0x5d)
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
if (sos_fw_ver >= 0x80455)
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
if (sos_fw_ver >= 0x8045b)
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
}
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.trans_msg = xgpu_ai_mailbox_trans_msg,
.get_pp_clk = xgpu_ai_get_pp_clk,
.force_dpm_level = xgpu_ai_force_dpm_level,
.init_reg_access_mode = xgpu_ai_init_reg_access_mode,
};

View file

@ -29,9 +29,18 @@
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "vega10_enum.h"
#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
}
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.detect_hw_virt = nbio_v7_0_detect_hw_virt,
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
};

View file

@ -27,9 +27,18 @@
#include "nbio/nbio_7_4_offset.h"
#include "nbio/nbio_7_4_sh_mask.h"
#include "nbio/nbio_7_4_0_smn.h"
#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
}
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.ih_control = nbio_v7_4_ih_control,
.init_registers = nbio_v7_4_init_registers,
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
};

View file

@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
};
@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
enum psp_gfx_fw_type fw_type; /* FW type */
};
/* Command to setup register program */
struct psp_gfx_cmd_reg_prog {
uint32_t reg_value;
uint32_t reg_id;
};
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@ -226,6 +233,7 @@ union psp_gfx_commands
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
};

View file

@ -52,6 +52,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
static int psp_v3_1_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type);
static int psp_v3_1_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@ -298,27 +302,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
psp_v3_1_reroute_ih(psp);
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
/* Write high address of the ring to C2PMSG_70 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
/* Write size of ring to C2PMSG_71 */
psp_ring_reg = ring->ring_size;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
/* Write the ring initialization command to C2PMSG_64 */
psp_ring_reg = ring_type;
psp_ring_reg = psp_ring_reg << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
if (psp_v3_1_support_vmr_ring(psp)) {
ret = psp_v3_1_ring_stop(psp, ring_type);
if (ret) {
DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
return ret;
}
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Write low address of the ring to C2PMSG_102 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
/* Write high address of the ring to C2PMSG_103 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
/* No size initialization for sriov */
/* Write the ring initialization command to C2PMSG_101 */
psp_ring_reg = ring_type;
psp_ring_reg = psp_ring_reg << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x8000FFFF, false);
/* there might be hardware handshake issue which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
mmMP0_SMN_C2PMSG_101), 0x80000000,
0x8000FFFF, false);
} else {
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
/* Write high address of the ring to C2PMSG_70 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
/* Write size of ring to C2PMSG_71 */
psp_ring_reg = ring->ring_size;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
/* Write the ring initialization command to C2PMSG_64 */
psp_ring_reg = ring_type;
psp_ring_reg = psp_ring_reg << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
/* there might be hardware handshake issue which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
mmMP0_SMN_C2PMSG_64), 0x80000000,
0x8000FFFF, false);
}
return ret;
}
@ -329,16 +363,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
unsigned int psp_ring_reg = 0;
struct amdgpu_device *adev = psp->adev;
/* Write the ring destroy command to C2PMSG_64 */
psp_ring_reg = 3 << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
if (psp_v3_1_support_vmr_ring(psp)) {
/* Write the Destroy GPCOM ring command to C2PMSG_101 */
psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* there might be handshake issue which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x80000000, false);
/* Wait for response flag (bit 31) in C2PMSG_101 */
ret = psp_wait_for(psp,
SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
0x80000000, 0x80000000, false);
} else {
/* Write the ring destroy command to C2PMSG_64 */
psp_ring_reg = 3 << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
/* there might be handshake issue which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp,
SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x80000000, false);
}
return ret;
}
@ -377,7 +426,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
/* KM (GPCOM) prepare write pointer */
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
if (psp_v3_1_support_vmr_ring(psp))
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
else
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
/* Update KM RB frame pointer to new frame */
/* write_frame ptr increments by size of rb_frame in bytes */
@ -406,7 +458,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
if (psp_v3_1_support_vmr_ring(psp)) {
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
/* send interrupt to PSP for SRIOV ring write pointer update */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
GFX_CTRL_CMD_ID_CONSUME_CMD);
} else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
return 0;
}
@ -576,6 +634,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
return 0;
}
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
{
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
return true;
return false;
}
static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@ -588,6 +654,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
.compare_sram_data = psp_v3_1_compare_sram_data,
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
.mode1_reset = psp_v3_1_mode1_reset,
.support_vmr_ring = psp_v3_1_support_vmr_ring,
};
void psp_v3_1_set_psp_funcs(struct psp_context *psp)

View file

@ -213,12 +213,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
soc15_program_register_sequence(adev,
golden_settings_sdma_4,
ARRAY_SIZE(golden_settings_sdma_4));
soc15_program_register_sequence(adev,
golden_settings_sdma_vg10,
ARRAY_SIZE(golden_settings_sdma_vg10));
if (!amdgpu_virt_support_skip_setting(adev)) {
soc15_program_register_sequence(adev,
golden_settings_sdma_4,
ARRAY_SIZE(golden_settings_sdma_4));
soc15_program_register_sequence(adev,
golden_settings_sdma_vg10,
ARRAY_SIZE(golden_settings_sdma_vg10));
}
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@ -1524,8 +1526,25 @@ static int sdma_v4_0_late_init(void *handle)
}
/* handle resume path. */
if (*ras_if)
if (*ras_if) {
/* resend ras TA enable cmd during resume.
* prepare to handle failure.
*/
ih_info.head = **ras_if;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r) {
if (r == -EAGAIN) {
/* request a gpu reset. will run again. */
amdgpu_ras_request_reset_on_boot(adev,
AMDGPU_RAS_BLOCK__SDMA);
return 0;
}
/* fail to enable ras, cleanup all. */
goto irq;
}
/* enable successfully. continue. */
goto resume;
}
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@ -1534,8 +1553,14 @@ static int sdma_v4_0_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r)
if (r) {
if (r == -EAGAIN) {
amdgpu_ras_request_reset_on_boot(adev,
AMDGPU_RAS_BLOCK__SDMA);
r = 0;
}
goto feature;
}
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@ -1574,7 +1599,7 @@ static int sdma_v4_0_late_init(void *handle)
feature:
kfree(*ras_if);
*ras_if = NULL;
return -EINVAL;
return r;
}
static int sdma_v4_0_sw_init(void *handle)

View file

@ -1376,6 +1376,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
{
uint64_t nak_r, nak_g;
/* Get the number of NAKs received and generated */
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
/* Add the total number of NAKs, i.e the number of replays */
return (nak_r + nak_g);
}
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@ -1394,6 +1406,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.need_full_reset = &si_need_full_reset,
.get_pcie_usage = &si_get_pcie_usage,
.need_reset_on_init = &si_need_reset_on_init,
.get_pcie_replay_count = &si_get_pcie_replay_count,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)

View file

@ -45,6 +45,7 @@
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
#include "nbio/nbio_7_0_default.h"
#include "nbio/nbio_7_0_offset.h"
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "mp/mp_9_0_offset.h"
@ -65,6 +66,9 @@
#include "dce_virtual.h"
#include "mxgpu_ai.h"
#include "amdgpu_smu.h"
#include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
#include <uapi/linux/kfd_ioctl.h>
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
@ -231,7 +235,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
@ -386,7 +390,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
tmp &= ~(entry->and_mask);
tmp |= entry->or_mask;
}
WREG32(reg, tmp);
if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
WREG32_RLC(reg, tmp);
else
WREG32(reg, tmp);
}
}
@ -476,6 +488,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
soc15_asic_get_baco_capability(adev, &baco_reset);
else
baco_reset = false;
if (baco_reset) {
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (hive || (ras && ras->supported))
baco_reset = false;
}
break;
default:
baco_reset = false;
@ -607,12 +626,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA20:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
if (adev->asic_type == CHIP_VEGA20)
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
else
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
/* For Vega10 SR-IOV, PSP need to be initialized before IH */
if (amdgpu_sriov_vf(adev)) {
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
if (adev->asic_type == CHIP_VEGA20)
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
else
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
}
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
} else {
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
if (adev->asic_type == CHIP_VEGA20)
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
else
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
}
}
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
@ -734,7 +765,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
return false;
if (!amdgpu_passthrough(adev))
return false;
if (adev->flags & AMD_IS_APU)
return false;
@ -749,6 +781,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
{
uint64_t nak_r, nak_g;
/* Get the number of NAKs received and generated */
nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
/* Add the total number of NAKs, i.e the number of replays */
return (nak_r + nak_g);
}
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
@ -766,6 +810,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.init_doorbell_index = &vega10_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
@ -785,12 +830,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.init_doorbell_index = &vega20_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
};
static int soc15_common_early_init(void *handle)
{
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &soc15_pcie_rreg;
@ -999,11 +1048,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
int i;
struct amdgpu_ring *ring;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
adev->nbio_funcs->sdma_doorbell_range(adev, i,
ring->use_doorbell, ring->doorbell_index,
adev->doorbell_index.sdma_doorbell_range);
/* Two reasons to skip
* 1, Host driver already programmed them
* 2, To avoid registers program violations in SR-IOV
*/
if (!amdgpu_virt_support_skip_setting(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
adev->nbio_funcs->sdma_doorbell_range(adev, i,
ring->use_doorbell, ring->doorbell_index,
adev->doorbell_index.sdma_doorbell_range);
}
}
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
@ -1020,6 +1075,12 @@ static int soc15_common_hw_init(void *handle)
soc15_program_aspm(adev);
/* setup nbio registers */
adev->nbio_funcs->init_registers(adev);
/* remap HDP registers to a hole in mmio space,
* for the purpose of expose those registers
* to process space
*/
if (adev->nbio_funcs->remap_hdp_registers)
adev->nbio_funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
/* HW doorbell routing policy: doorbell writing not

View file

@ -42,8 +42,18 @@ struct soc15_reg_golden {
u32 or_mask;
};
struct soc15_reg_entry {
uint32_t hwip;
uint32_t inst;
uint32_t seg;
uint32_t reg_offset;
uint32_t reg_value;
};
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }

View file

@ -69,26 +69,60 @@
} \
} while (0)
#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
#define WREG32_RLC(reg, value) \
do { \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
uint32_t i = 0; \
uint32_t retries = 50000; \
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
WREG32(r0, value); \
WREG32(r1, (reg | 0x80000000)); \
WREG32(spare_int, 0x1); \
for (i = 0; i < retries; i++) { \
u32 tmp = RREG32(r1); \
if (!(tmp & 0x80000000)) \
break; \
udelay(10); \
} \
if (i >= retries) \
pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
} else { \
WREG32(reg, value); \
} \
} while (0)
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
if (target_reg == grbm_cntl) \
WREG32(r2, value); \
else if (target_reg == grbm_idx) \
WREG32(r3, value); \
WREG32(target_reg, value); \
} else { \
WREG32(target_reg, value); \
} \
} while (0)
#define WREG32_SOC15_RLC(ip, inst, reg, value) \
do { \
uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
WREG32_RLC(target_reg, value); \
} while (0)
#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
(RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
& ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
#endif

View file

@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = uvd_v4_2_ring_get_rptr,
.get_wptr = uvd_v4_2_ring_get_wptr,
.set_wptr = uvd_v4_2_ring_set_wptr,

View file

@ -850,6 +850,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = uvd_v5_0_ring_get_rptr,
.get_wptr = uvd_v5_0_ring_get_wptr,
.set_wptr = uvd_v5_0_ring_set_wptr,

View file

@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = uvd_v6_0_enc_ring_get_rptr,
.get_wptr = uvd_v6_0_enc_ring_get_wptr,
.set_wptr = uvd_v6_0_enc_ring_set_wptr,

View file

@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
.get_wptr = uvd_v7_0_enc_ring_get_wptr,

View file

@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = vce_v2_0_ring_get_rptr,
.get_wptr = vce_v2_0_ring_get_wptr,
.set_wptr = vce_v2_0_ring_set_wptr,

View file

@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,

View file

@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,

View file

@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
struct dpg_pause_state *new_state);
/**
* vcn_v1_0_early_init - set function pointers
@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
return r;
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
return 0;
}
/**
@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
return r;
}
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
struct dpg_pause_state *new_state)
{
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
struct amdgpu_ring *ring;
/* pause/unpause if state is changed */
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) {
/* pause DPG non-jpeg */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
ring = &adev->vcn.ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
ring = &adev->vcn.ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg non-jpeg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
adev->vcn.pause_state.fw_based = new_state->fw_based;
}
/* pause/unpause if state is changed */
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) {
/* Make sure JPRG Snoop is disabled before sending the pause */
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
/* pause DPG jpeg */
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
ring = &adev->vcn.ring_jpeg;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg jpeg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
adev->vcn.pause_state.jpeg = new_state->jpeg;
}
return 0;
}
static bool vcn_v1_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
.no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
.align_mask = 0xf,
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.extra_dw = 64,
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,

View file

@ -50,14 +50,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
return;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
}
adev->irq.ih.enabled = true;
if (adev->irq.ih1.ring_size) {
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
return;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
}
adev->irq.ih1.enabled = true;
}
@ -65,7 +80,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
return;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
}
adev->irq.ih2.enabled = true;
}
}
@ -83,7 +106,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
return;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
}
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
@ -94,7 +125,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
return;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
}
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@ -106,7 +145,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
return;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
}
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
@ -189,7 +237,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
!!adev->irq.msi_enabled);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
return -ETIMEDOUT;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
}
/* set the writeback address whether it's enabled or not */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@ -216,7 +272,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WPTR_OVERFLOW_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
RB_FULL_DRAIN_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
return -ETIMEDOUT;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
}
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@ -234,7 +298,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
ih_rb_cntl)) {
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
return -ETIMEDOUT;
}
} else {
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
}
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);

View file

@ -989,6 +989,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
{
uint64_t nak_r, nak_g;
/* Get the number of NAKs received and generated */
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
/* Add the total number of NAKs, i.e the number of replays */
return (nak_r + nak_g);
}
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
{
u32 clock_cntl, pc;
@ -1023,6 +1035,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &vi_get_pcie_usage,
.need_reset_on_init = &vi_need_reset_on_init,
.get_pcie_replay_count = &vi_get_pcie_replay_count,
};
#define CZ_REV_BRISTOL(rev) \

View file

@ -21,7 +21,7 @@
*/
static const uint32_t cwsr_trap_gfx8_hex[] = {
0xbf820001, 0xbf82012b,
0xbf820001, 0xbf820121,
0xb8f4f802, 0x89748674,
0xb8f5f803, 0x8675ff75,
0x00000400, 0xbf850017,
@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x8671ff71, 0x0000ffff,
0x8f728374, 0xb972e0c2,
0xbf800002, 0xb9740002,
0xbe801f70, 0xb8f5f803,
0x8675ff75, 0x00000100,
0xbf840006, 0xbefa0080,
0xb97a0203, 0x8671ff71,
0x0000ffff, 0x80f08870,
0x82f18071, 0xbefa0080,
0xbe801f70, 0xbefa0080,
0xb97a0283, 0xbef60068,
0xbef70069, 0xb8fa1c07,
0x8e7a9c7a, 0x87717a71,
@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf820001, 0xbf82015d,
0xbf820001, 0xbf82015e,
0xb8f8f802, 0x89788678,
0xb8f1f803, 0x866eff71,
0x00000400, 0xbf850037,
0x866eff71, 0x00000800,
0xbf850003, 0x866eff71,
0x00000100, 0xbf840008,
0xb8fbf803, 0x866eff7b,
0x00000400, 0xbf85003b,
0x866eff7b, 0x00000800,
0xbf850003, 0x866eff7b,
0x00000100, 0xbf84000c,
0x866eff78, 0x00002000,
0xbf840001, 0xbf810000,
0xbf840005, 0xbf8e0010,
0xb8eef803, 0x866eff6e,
0x00000400, 0xbf84fffb,
0x8778ff78, 0x00002000,
0x80ec886c, 0x82ed806d,
0xb8eef807, 0x866fff6e,
@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x8977ff77, 0xfc000000,
0x87776f77, 0x896eff6e,
0x001f8000, 0xb96ef807,
0xb8f0f812, 0xb8f1f813,
0x8ef08870, 0xc0071bb8,
0xb8faf812, 0xb8fbf813,
0x8efa887a, 0xc0071bbd,
0x00000000, 0xbf8cc07f,
0xc0071c38, 0x00000008,
0xc0071ebd, 0x00000008,
0xbf8cc07f, 0x86ee6e6e,
0xbf840001, 0xbe801d6e,
0xb8f1f803, 0x8671ff71,
0xb8fbf803, 0x867bff7b,
0x000001ff, 0xbf850002,
0x806c846c, 0x826d806d,
0x866dff6d, 0x0000ffff,
@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x8f6e8378, 0xb96ee0c2,
0xbf800002, 0xb9780002,
0xbe801f6c, 0x866dff6d,
0x0000ffff, 0xbef00080,
0xb9700283, 0xb8f02407,
0x8e709c70, 0x876d706d,
0xb8f003c7, 0x8e709b70,
0x876d706d, 0xb8f0f807,
0x8670ff70, 0x00007fff,
0xb970f807, 0xbeee007e,
0x0000ffff, 0xbefa0080,
0xb97a0283, 0xb8fa2407,
0x8e7a9b7a, 0x876d7a6d,
0xb8fa03c7, 0x8e7a9a7a,
0x876d7a6d, 0xb8faf807,
0x867aff7a, 0x00007fff,
0xb97af807, 0xbeee007e,
0xbeef007f, 0xbefe0180,
0xbf900004, 0x87708478,
0xb970f802, 0xbf8e0002,
0xbf88fffe, 0xb8f02a05,
0xbf900004, 0x877a8478,
0xb97af802, 0xbf8e0002,
0xbf88fffe, 0xb8fa2a05,
0x807a817a, 0x8e7a8a7a,
0xb8fb1605, 0x807b817b,
0x8e7b867b, 0x807a7b7a,
0x807a7e7a, 0x827b807f,
0x867bff7b, 0x0000ffff,
0xc04b1c3d, 0x00000050,
0xbf8cc07f, 0xc04b1d3d,
0x00000060, 0xbf8cc07f,
0xc0431e7d, 0x00000074,
0xbf8cc07f, 0xbef4007e,
0x8675ff7f, 0x0000ffff,
0x8775ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x00807fac, 0x867aff7f,
0x08000000, 0x8f7a837a,
0x87777a77, 0x867aff7f,
0x70000000, 0x8f7a817a,
0x87777a77, 0xbef1007c,
0xbef00080, 0xb8f02a05,
0x80708170, 0x8e708a70,
0xb8f11605, 0x80718171,
0x8e718671, 0x80707170,
0x80707e70, 0x8271807f,
0x8671ff71, 0x0000ffff,
0xc0471cb8, 0x00000040,
0xbf8cc07f, 0xc04b1d38,
0x00000048, 0xbf8cc07f,
0xc0431e78, 0x00000058,
0xbf8cc07f, 0xc0471eb8,
0x0000005c, 0xbf8cc07f,
0xb8fa1605, 0x807a817a,
0x8e7a867a, 0x80707a70,
0xbef60084, 0xbef600ff,
0x01000000, 0xbefe007c,
0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611e3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xb8fbf803,
0xbefe007c, 0xbefc0070,
0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611a7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xb8f1f801,
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0x867aff7f,
0x04000000, 0xbeef0080,
0x876f6f7a, 0xb8f02a05,
0x80708170, 0x8e708a70,
0xb8fb1605, 0x807b817b,
0x8e7b847b, 0x8e76827b,
0xbef600ff, 0x01000000,
0xbef20174, 0x80747074,
0x82758075, 0xbefc0080,
0xbf800000, 0xbe802b00,
0xbe822b02, 0xbe842b04,
0xbe862b06, 0xbe882b08,
0xbe8a2b0a, 0xbe8c2b0c,
0xbe8e2b0e, 0xc06b003a,
0x00000000, 0xbf8cc07f,
0xc06b013a, 0x00000010,
0xbf8cc07f, 0xc06b023a,
0x00000020, 0xbf8cc07f,
0xc06b033a, 0x00000030,
0xbf8cc07f, 0x8074c074,
0x82758075, 0x807c907c,
0xbf0a7b7c, 0xbf85ffe7,
0xbef40172, 0xbef00080,
0xbefe00c1, 0xbeff00c1,
0xbee80080, 0xbee90080,
0xbef600ff, 0x01000000,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
0xb8fb4306, 0x867bc17b,
0xbf84002c, 0xbf8a0000,
0x867aff6f, 0x04000000,
0xbf840028, 0x8e7b867b,
0x8e7b827b, 0xbef6007b,
0xb8f02a05, 0x80708170,
0x8e708a70, 0xb8fa1605,
0x807a817a, 0x8e7a867a,
0x80707a70, 0x8070ff70,
0x00000080, 0xbef600ff,
0x01000000, 0xbefc0080,
0xd28c0002, 0x000100c1,
0xd28d0003, 0x000204c1,
0xd1060002, 0x00011103,
0x7e0602ff, 0x00000200,
0xbefc00ff, 0x00010000,
0xbe800077, 0x8677ff77,
0xff7fffff, 0x8777ff77,
0x00058000, 0xd8ec0000,
0x00000002, 0xbf8cc07f,
0xe0765000, 0x701d0002,
0x68040702, 0xd0c9006a,
0x0000f702, 0xbf87fff7,
0xbef70000, 0xbef000ff,
0x00000400, 0xbefe00c1,
0xbeff00c1, 0xb8fb2a05,
0x807b817b, 0x8e7b827b,
0x8e76887b, 0xbef600ff,
0x01000000, 0xbefc0084,
0xbf0a7b7c, 0xbf840015,
0xbf11017c, 0x807bff7b,
0x00001000, 0x7e000300,
0x7e020301, 0x7e040302,
0x7e060303, 0xe0724000,
0x701d0000, 0xe0724100,
0x701d0100, 0xe0724200,
0x701d0200, 0xe0724300,
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
0xbf9c0000, 0xbf8200da,
0xbef4007e, 0x8675ff7f,
0x0000ffff, 0x8775ff75,
0x00040000, 0xbef60080,
0xbef700ff, 0x00807fac,
0x8670ff7f, 0x08000000,
0x8f708370, 0x87777077,
0x8670ff7f, 0x70000000,
0x8f708170, 0x87777077,
0xbefb007c, 0xbefa0080,
0xb8fa2a05, 0x807a817a,
0x8e7a8a7a, 0xb8f01605,
0x80708170, 0x8e708670,
0x807a707a, 0xbef60084,
0xbef600ff, 0x01000000,
0xbefe007c, 0xbefc007a,
0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611b3a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611b7a, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611bba,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611bfa, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611e3a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xb8f1f803, 0xbefe007c,
0xbefc007a, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611a3a, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611a7a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xb8fbf801, 0xbefe007c,
0xbefc007a, 0xc0611efa,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0x8670ff7f, 0x04000000,
0xbeef0080, 0x876f6f70,
0xb8fa2a05, 0x807a817a,
0x8e7a8a7a, 0xb8f11605,
0x80718171, 0x8e718471,
0x8e768271, 0xbef600ff,
0x01000000, 0xbef20174,
0x80747a74, 0x82758075,
0xbefc0080, 0xbf800000,
0xbe802b00, 0xbe822b02,
0xbe842b04, 0xbe862b06,
0xbe882b08, 0xbe8a2b0a,
0xbe8c2b0c, 0xbe8e2b0e,
0xc06b003a, 0x00000000,
0xbf8cc07f, 0xc06b013a,
0x00000010, 0xbf8cc07f,
0xc06b023a, 0x00000020,
0xbf8cc07f, 0xc06b033a,
0x00000030, 0xbf8cc07f,
0x8074c074, 0x82758075,
0x807c907c, 0xbf0a717c,
0xbf85ffe7, 0xbef40172,
0xbefa0080, 0xbefe00c1,
0xbeff00c1, 0xbee80080,
0xbee90080, 0xbef600ff,
0x01000000, 0xe0724000,
0x7a1d0000, 0xe0724100,
0x7a1d0100, 0xe0724200,
0x7a1d0200, 0xe0724300,
0x7a1d0300, 0xbefe00c1,
0xbeff00c1, 0xb8f14306,
0x8671c171, 0xbf84002c,
0xbf8a0000, 0x8670ff6f,
0x04000000, 0xbf840028,
0x8e718671, 0x8e718271,
0xbef60071, 0xb8fa2a05,
0x807a817a, 0x8e7a8a7a,
0xb8f01605, 0x80708170,
0x8e708670, 0x807a707a,
0x807aff7a, 0x00000080,
0xbef600ff, 0x01000000,
0xbefc0080, 0xd28c0002,
0x000100c1, 0xd28d0003,
0x000204c1, 0xd1060002,
0x00011103, 0x7e0602ff,
0x00000200, 0xbefc00ff,
0x00010000, 0xbe800077,
0x8677ff77, 0xff7fffff,
0x8777ff77, 0x00058000,
0xd8ec0000, 0x00000002,
0xbf8cc07f, 0xe0765000,
0x7a1d0002, 0x68040702,
0xd0c9006a, 0x0000e302,
0xbf87fff7, 0xbef70000,
0xbefa00ff, 0x00000400,
0xbefe00c1, 0xbeff00c1,
0xb8f12a05, 0x80718171,
0x8e718271, 0x8e768871,
0xbef600ff, 0x01000000,
0xbefc0084, 0xbf0a717c,
0xbf840015, 0xbf11017c,
0x8071ff71, 0x00001000,
0x7e000300, 0x7e020301,
0x7e040302, 0x7e060303,
0xe0724000, 0x7a1d0000,
0xe0724100, 0x7a1d0100,
0xe0724200, 0x7a1d0200,
0xe0724300, 0x7a1d0300,
0x807c847c, 0x807aff7a,
0x00000400, 0xbf0a717c,
0xbf85ffef, 0xbf9c0000,
0xbf8200dc, 0xbef4007e,
0x8675ff7f, 0x0000ffff,
0x8775ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x00807fac, 0x866eff7f,
0x08000000, 0x8f6e836e,
0x87776e77, 0x866eff7f,
0x70000000, 0x8f6e816e,
0x87776e77, 0x866eff7f,
0x04000000, 0xbf84001e,
0xbefe00c1, 0xbeff00c1,
0xb8ef4306, 0x866fc16f,
0xbf840019, 0x8e6f866f,
0x8e6f826f, 0xbef6006f,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
0x80786e78, 0x8078ff78,
0x00000080, 0xbef600ff,
0x01000000, 0xbefc0080,
0xe0510000, 0x781d0000,
0xe0510100, 0x781d0000,
0x807cff7c, 0x00000200,
0x8078ff78, 0x00000200,
0xbf0a6f7c, 0xbf85fff6,
0xbef80080, 0xbefe00c1,
0xbeff00c1, 0xb8ef2a05,
0x806f816f, 0x8e6f826f,
0x8e76886f, 0xbef600ff,
0x01000000, 0xbeee0078,
0x8078ff78, 0x00000400,
0xbefc0084, 0xbf11087c,
0x806fff6f, 0x00008000,
0xe0524000, 0x781d0000,
0xe0524100, 0x781d0100,
0xe0524200, 0x781d0200,
0xe0524300, 0x781d0300,
0xbf8c0f70, 0x7e000300,
0x7e020301, 0x7e040302,
0x7e060303, 0x807c847c,
0x8078ff78, 0x00000400,
0xbf0a6f7c, 0xbf85ffee,
0xbf9c0000, 0xe0524000,
0x6e1d0000, 0xe0524100,
0x6e1d0100, 0xe0524200,
0x6e1d0200, 0xe0524300,
0x6e1d0300, 0xb8f82a05,
0x866eff7f, 0x08000000,
0x8f6e836e, 0x87776e77,
0x866eff7f, 0x70000000,
0x8f6e816e, 0x87776e77,
0x866eff7f, 0x04000000,
0xbf84001e, 0xbefe00c1,
0xbeff00c1, 0xb8ef4306,
0x866fc16f, 0xbf840019,
0x8e6f866f, 0x8e6f826f,
0xbef6006f, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
0x80f8c078, 0xb8ef1605,
0x806f816f, 0x8e6f846f,
0x8e76826f, 0xbef600ff,
0x01000000, 0xbefc006f,
0xc031003a, 0x00000078,
0x80f8c078, 0xbf8cc07f,
0x80fc907c, 0xbf800000,
0xbe802d00, 0xbe822d02,
0xbe842d04, 0xbe862d06,
0xbe882d08, 0xbe8a2d0a,
0xbe8c2d0c, 0xbe8e2d0e,
0xbf06807c, 0xbf84fff0,
0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
0xbefc0080, 0xe0510000,
0x781d0000, 0xe0510100,
0x781d0000, 0x807cff7c,
0x00000200, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
0xbf85fff6, 0xbef80080,
0xbefe00c1, 0xbeff00c1,
0xb8ef2a05, 0x806f816f,
0x8e6f826f, 0x8e76886f,
0xbef600ff, 0x01000000,
0xbeee0078, 0x8078ff78,
0x00000400, 0xbefc0084,
0xbf11087c, 0x806fff6f,
0x00008000, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
0x7e000300, 0x7e020301,
0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
0xbf85ffee, 0xbf9c0000,
0xe0524000, 0x6e1d0000,
0xe0524100, 0x6e1d0100,
0xe0524200, 0x6e1d0200,
0xe0524300, 0x6e1d0300,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
0x80786e78, 0xbef60084,
0x80786e78, 0x80f8c078,
0xb8ef1605, 0x806f816f,
0x8e6f846f, 0x8e76826f,
0xbef600ff, 0x01000000,
0xc0211bfa, 0x00000078,
0x80788478, 0xc0211b3a,
0xbefc006f, 0xc031003a,
0x00000078, 0x80f8c078,
0xbf8cc07f, 0x80fc907c,
0xbf800000, 0xbe802d00,
0xbe822d02, 0xbe842d04,
0xbe862d06, 0xbe882d08,
0xbe8a2d0a, 0xbe8c2d0c,
0xbe8e2d0e, 0xbf06807c,
0xbf84fff0, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
0xbef60084, 0xbef600ff,
0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
0xc0211b7a, 0x00000078,
0x80788478, 0xc0211eba,
0xc0211b3a, 0x00000078,
0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
0xc0211efa, 0x00000078,
0x80788478, 0xc0211c3a,
0xc0211c3a, 0x00000078,
0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
0xc0211c7a, 0x00000078,
0x80788478, 0xc0211a3a,
0xc0211eba, 0x00000078,
0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
0xc0211a7a, 0x00000078,
0x80788478, 0xc0211cfa,
0xc0211a3a, 0x00000078,
0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
0xbf8cc07f, 0xbefc006f,
0xbefe007a, 0xbeff007b,
0x866f71ff, 0x000003ff,
0xb96f4803, 0x866f71ff,
0xfffff800, 0x8f6f8b6f,
0xb96fa2c3, 0xb973f801,
0xb8ee2a05, 0x806e816e,
0x8e6e8a6e, 0xb8ef1605,
0x806f816f, 0x8e6f866f,
0x806e6f6e, 0x806e746e,
0x826f8075, 0x866fff6f,
0x0000ffff, 0xc0071cb7,
0x00000040, 0xc00b1d37,
0x00000048, 0xc0031e77,
0x00000058, 0xc0071eb7,
0x0000005c, 0xbf8cc07f,
0x866fff6d, 0xf0000000,
0x8f6f9c6f, 0x8e6f906f,
0xbeee0080, 0x876e6f6e,
0x866fff6d, 0x08000000,
0x8f6f9b6f, 0x8e6f8f6f,
0x876e6f6e, 0x866fff70,
0x00800000, 0x8f6f976f,
0xb96ef807, 0x866dff6d,
0x0000ffff, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8370,
0xb96ee0c2, 0xbf800002,
0xb9700002, 0xbf8a0000,
0x95806f6c, 0xbf810000,
0xc0211cfa, 0x00000078,
0x80788478, 0xbf8cc07f,
0xbefc006f, 0xbefe0070,
0xbeff0071, 0x866f7bff,
0x000003ff, 0xb96f4803,
0x866f7bff, 0xfffff800,
0x8f6f8b6f, 0xb96fa2c3,
0xb973f801, 0xb8ee2a05,
0x806e816e, 0x8e6e8a6e,
0xb8ef1605, 0x806f816f,
0x8e6f866f, 0x806e6f6e,
0x806e746e, 0x826f8075,
0x866fff6f, 0x0000ffff,
0xc00b1c37, 0x00000050,
0xc00b1d37, 0x00000060,
0xc0031e77, 0x00000074,
0xbf8cc07f, 0x866fff6d,
0xf8000000, 0x8f6f9b6f,
0x8e6f906f, 0xbeee0080,
0x876e6f6e, 0x866fff6d,
0x04000000, 0x8f6f9a6f,
0x8e6f8f6f, 0x876e6f6e,
0x866fff7a, 0x00800000,
0x8f6f976f, 0xb96ef807,
0x866dff6d, 0x0000ffff,
0x86fe7e7e, 0x86ea6a6a,
0x8f6e837a, 0xb96ee0c2,
0xbf800002, 0xb97a0002,
0xbf8a0000, 0x95806f6c,
0xbf810000, 0x00000000,
};

View file

@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
end
//check whether there is mem_viol
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
s_cbranch_scc0 L_NO_PC_REWIND
//if so, need rewind PC assuming GDS operation gets NACKed
s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
L_NO_PC_REWIND:
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit

View file

@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
var s_save_tmp = ttmp4
var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
var s_save_tmp = ttmp14
var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
var s_save_xnack_mask_lo = ttmp6
var s_save_xnack_mask_hi = ttmp7
var s_save_buf_rsrc0 = ttmp8
@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
var s_save_buf_rsrc2 = ttmp10
var s_save_buf_rsrc3 = ttmp11
var s_save_status = ttmp12
var s_save_mem_offset = ttmp14
var s_save_mem_offset = ttmp4
var s_save_alloc_size = s_save_trapsts //conflict
var s_save_m0 = ttmp15
var s_save_m0 = ttmp5
var s_save_ttmps_lo = s_save_tmp //no conflict
var s_save_ttmps_hi = s_save_trapsts //no conflict
@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
var s_restore_pc_lo = ttmp0
var s_restore_pc_hi = ttmp1
var s_restore_exec_lo = ttmp14
var s_restore_exec_hi = ttmp15
var s_restore_status = ttmp4
var s_restore_trapsts = ttmp5
var s_restore_exec_lo = ttmp4
var s_restore_exec_hi = ttmp5
var s_restore_status = ttmp14
var s_restore_trapsts = ttmp15
var s_restore_xnack_mask_lo = xnack_mask_lo
var s_restore_xnack_mask_hi = xnack_mask_hi
var s_restore_buf_rsrc0 = ttmp8
@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
L_HALT_WAVE:
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
// We cannot prevent further faults so just terminate the wavefront.
// We cannot prevent further faults. Spin wait until context saved.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_ALREADY_HALTED
s_endpgm
L_WAIT_CTX_SAVE:
s_sleep 0x10
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc0 L_WAIT_CTX_SAVE
L_NOT_ALREADY_HALTED:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
// ttmp12 holds SQ_WAVE_STATUS
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
@ -405,7 +411,7 @@ end
else
end
// Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_save_ttmps_lo)
get_sgpr_size_bytes(s_save_ttmps_hi)
@ -413,13 +419,11 @@ end
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
ack_sqc_store_workaround()
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
ack_sqc_store_workaround()
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
ack_sqc_store_workaround()
s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
ack_sqc_store_workaround()
/* setup Resource Contants */
@ -1093,7 +1097,7 @@ end
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo)
get_sgpr_size_bytes(s_restore_ttmps_hi)
@ -1101,10 +1105,9 @@ end
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
//reuse s_restore_m0 as a temp register

View file

@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
else
return -ENOTSUPP;
@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process_device *pdd;
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
if (!dev)
return -EINVAL;
mutex_lock(&p->mutex);
@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE)
return -EINVAL;
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset)
return -ENOMEM;
}
mutex_lock(&p->mutex);
@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
args->mmap_offset = offset;
/* MMIO is mapped through kfd device
* Generate a kfd mmap offset
*/
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
args->mmap_offset <<= PAGE_SHIFT;
}
return 0;
err_free:
@ -1551,6 +1567,32 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return err;
}
static int kfd_ioctl_alloc_queue_gws(struct file *filep,
struct kfd_process *p, void *data)
{
int retval;
struct kfd_ioctl_alloc_queue_gws_args *args = data;
struct kfd_dev *dev;
if (!hws_gws_support)
return -ENODEV;
dev = kfd_device_by_id(args->gpu_id);
if (!dev) {
pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
return -ENODEV;
}
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return -ENODEV;
mutex_lock(&p->mutex);
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
mutex_unlock(&p->mutex);
args->first_gws = 0;
return retval;
}
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
struct kfd_process *p, void *data)
{
@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
kfd_ioctl_import_dmabuf, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
kfd_ioctl_alloc_queue_gws, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@ -1845,6 +1889,39 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
return retcode;
}
static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
int ret;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pr_debug("Process %d mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
process->pasid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
ret = io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
PAGE_SIZE,
vma->vm_page_prot);
return ret;
}
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (!dev)
return -ENODEV;
return kfd_reserved_mem_mmap(dev, process, vma);
case KFD_MMAP_TYPE_MMIO:
if (!dev)
return -ENODEV;
return kfd_mmio_mmap(dev, process, vma);
}
return -EFAULT;

View file

@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
#define polaris12_cache_info carrizo_cache_info
#define vegam_cache_info carrizo_cache_info
/* TODO - check & update Vega10 cache details */
#define vega10_cache_info carrizo_cache_info
#define raven_cache_info carrizo_cache_info
@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
props->weight = 15;
props->weight = 15 * iolink->num_hops_xgmi;
else
props->weight = node_distance(id_from, id_to);
@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris12_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
break;
case CHIP_VEGAM:
pcache_info = vegam_cache_info;
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
struct kfd_dev *kdev,
struct kfd_dev *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
sub_type_hdr->num_hops_xgmi =
amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
return 0;
}
@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
(char *)sub_type_hdr +
sizeof(struct crat_subtype_iolink));
ret = kfd_fill_gpu_xgmi_link_to_gpu(
&avail_size, kdev,
&avail_size, kdev, peer_dev->gpu,
(struct crat_subtype_iolink *)sub_type_hdr,
proximity_domain, nid);
if (ret < 0)

View file

@ -274,7 +274,8 @@ struct crat_subtype_iolink {
uint32_t minimum_bandwidth_mbs;
uint32_t maximum_bandwidth_mbs;
uint32_t recommended_transfer_size;
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH];
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
uint8_t num_hops_xgmi;
};
/*

View file

@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = true,
.num_sdma_engines = 1,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
#endif
@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info vegam_device_info = {
.asic_family = CHIP_VEGAM,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
};
@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
{ 0x694C, &vegam_device_info }, /* VegaM */
{ 0x694E, &vegam_device_info }, /* VegaM */
{ 0x694F, &vegam_device_info }, /* VegaM */
{ 0x6860, &vega10_device_info }, /* Vega10 */
{ 0x6861, &vega10_device_info }, /* Vega10 */
{ 0x6862, &vega10_device_info }, /* Vega10 */
@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} else
kfd->max_proc_per_quantum = hws_max_conc_proc;
/* Allocate global GWS that is shared by all KFD processes */
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
dev_err(kfd_device, "Could not allocate %d gws\n",
amdgpu_amdkfd_get_num_gws(kfd->kgd));
goto out;
}
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
goto out;
goto alloc_gtt_mem_failure;
}
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@ -611,6 +653,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
alloc_gtt_mem_failure:
if (hws_gws_support)
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (hws_gws_support)
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
}
kfree(kfd);

View file

@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int sdma_queue_id);
struct queue *q);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
{
if (type == KFD_QUEUE_TYPE_SDMA)
if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
return KFD_MQD_TYPE_SDMA;
return KFD_MQD_TYPE_CP;
}
@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
return dqm->dev->device_info->num_sdma_engines;
}
static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_xgmi_sdma_engines;
}
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_sdma_engines
* dqm->dev->device_info->num_sdma_queues_per_engine;
}
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_xgmi_sdma_engines
* dqm->dev->device_info->num_sdma_queues_per_engine;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* preserve the user mode ABI.
*/
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
* doorbell assignments based on the engine and queue id.
* The doobell index distance between RLC (2*i) and (2*i+1)
@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
struct kfd_dev *dev = qpd->dqm->dev;
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
return;
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
else
retval = -EINVAL;
@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (!mqd_mgr)
return -ENOMEM;
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
retval = allocate_hqd(dqm, q);
if (retval)
@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
int retval;
struct mqd_manager *mqd_mgr;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr)
return -ENOMEM;
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
deallocate_hqd(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
deallocate_sdma_queue(dqm, q);
} else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = -ENODEV;
goto out_unlock;
}
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto out_unlock;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
}
} else if (prev_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = map_queues_cpsch(dqm);
else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
if (WARN(q->process->mm != current->mm,
"should only run in user thread"))
retval = -EFAULT;
@ -571,27 +584,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
return retval;
}
static struct mqd_manager *get_mqd_manager(
struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
{
struct mqd_manager *mqd_mgr;
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
pr_debug("mqd type %d\n", type);
mqd_mgr = dqm->mqd_mgrs[type];
if (!mqd_mgr) {
mqd_mgr = mqd_manager_init(type, dqm->dev);
if (!mqd_mgr)
pr_err("mqd manager is NULL");
dqm->mqd_mgrs[type] = mqd_mgr;
}
return mqd_mgr;
}
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_active)
continue;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) { /* should not be here */
pr_err("Cannot evict queue, mqd mgr is NULL\n");
retval = -ENOMEM;
goto out;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_evicted = true;
q->properties.is_active = false;
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_evicted)
continue;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) { /* should not be here */
pr_err("Cannot restore queue, mqd mgr is NULL\n");
retval = -ENOMEM;
goto out;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_evicted = false;
q->properties.is_active = true;
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++;
kfd_inc_compute_active(dqm->dev);
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
kfd_inc_compute_active(dqm->dev);
return retval;
}
@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
kfd_dec_compute_active(dqm->dev);
goto out;
}
}
@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
retval = 1;
out:
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
if (!retval)
kfd_dec_compute_active(dqm->dev);
return retval;
}
@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
dqm->xgmi_sdma_queue_count = 0;
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
}
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
return 0;
}
@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
}
static int allocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int *sdma_queue_id)
struct queue *q)
{
int bit;
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
q->properties.sdma_engine_id = q->sdma_id %
get_num_sdma_engines(dqm);
q->properties.sdma_queue_id = q->sdma_id /
get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (dqm->xgmi_sdma_bitmap == 0)
return -ENOMEM;
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
* optimized SDMAs. The calculation below
* assumes the first N engines are always
* PCIe-optimized ones
*/
q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
q->sdma_id % get_num_xgmi_sdma_engines(dqm);
q->properties.sdma_queue_id = q->sdma_id /
get_num_xgmi_sdma_engines(dqm);
}
bit = ffs(dqm->sdma_bitmap) - 1;
dqm->sdma_bitmap &= ~(1 << bit);
*sdma_queue_id = bit;
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
return 0;
}
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int sdma_queue_id)
struct queue *q)
{
if (sdma_queue_id >= get_num_sdma_queues(dqm))
return;
dqm->sdma_bitmap |= (1 << sdma_queue_id);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (q->sdma_id >= get_num_sdma_queues(dqm))
return;
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
return;
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
}
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
if (!mqd_mgr)
return -ENOMEM;
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
retval = allocate_sdma_queue(dqm, &q->sdma_id);
retval = allocate_sdma_queue(dqm, q);
if (retval)
return retval;
q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
retval = allocate_doorbell(qpd, q);
if (retval)
goto out_deallocate_sdma_queue;
pr_debug("SDMA id is: %d\n", q->sdma_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
@ -987,7 +1002,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
return retval;
}
@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->processes_count = 0;
dqm->sdma_queue_count = 0;
dqm->xgmi_sdma_queue_count = 0;
dqm->active_runlist = false;
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
int retval;
struct mqd_manager *mqd_mgr;
retval = 0;
dqm_lock(dqm);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count);
retval = -EPERM;
goto out_unlock;
goto out;
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, &q->sdma_id);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
retval = allocate_sdma_queue(dqm, q);
if (retval)
goto out_unlock;
q->properties.sdma_queue_id =
q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id =
q->sdma_id % get_num_sdma_engines(dqm);
goto out;
}
retval = allocate_doorbell(qpd, q);
if (retval)
goto out_deallocate_sdma_queue;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval)
goto out_deallocate_doorbell;
dqm_lock(dqm);
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
* type or whether the queue is active.
@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
out_unlock:
dqm_unlock(dqm);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
out:
return retval;
}
@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
return 0;
}
static int unmap_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
static int unmap_sdma_queues(struct device_queue_manager *dqm)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
sdma_engine);
int i, retval = 0;
for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
if (retval)
return retval;
}
return retval;
}
/* dqm->lock mutex has to be locked before calling this function */
@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
pr_debug("Before destroying queues, sdma queue count is : %u\n",
dqm->sdma_queue_count);
pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
if (dqm->sdma_queue_count > 0) {
unmap_sdma_queues(dqm, 0);
unmap_sdma_queues(dqm, 1);
}
if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
unmap_sdma_queues(dqm);
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
filter, filter_param, false, 0);
@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
}
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto failed;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
deallocate_doorbell(qpd, q);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
deallocate_sdma_queue(dqm, q);
}
list_del(&q->list);
@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = true;
}
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
/*
* Unconditionally decrement this counter, regardless of the queue's
* type
@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
return retval;
failed:
failed_try_destroy_debugged_queue:
dqm_unlock(dqm);
@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct queue *q, *next;
struct device_process_node *cur, *next_dpn;
int retval = 0;
bool found = false;
dqm_lock(dqm);
@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
kfd_dec_compute_active(dqm->dev);
found = true;
break;
}
}
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
if (found)
kfd_dec_compute_active(dqm->dev);
return retval;
}
@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
goto dqm_unlock;
}
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (!mqd_mgr) {
r = -ENOMEM;
goto dqm_unlock;
}
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
if (!mqd_mgr->get_wave_state) {
r = -EINVAL;
@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
struct device_process_node *cur, *next_dpn;
enum kfd_unmap_queues_filter filter =
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
bool found = false;
retval = 0;
@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
deallocate_sdma_queue(dqm, q);
}
if (q->properties.is_active)
@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
kfd_dec_compute_active(dqm->dev);
found = true;
break;
}
}
@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = false;
}
/* lastly, free mqd resources */
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
if (found)
kfd_dec_compute_active(dqm->dev);
/* Lastly, free mqd resources.
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
*/
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto out;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
list_del(&q->list);
qpd->queue_count--;
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
}
out:
dqm_unlock(dqm);
return retval;
}
static int init_mqd_managers(struct device_queue_manager *dqm)
{
int i, j;
struct mqd_manager *mqd_mgr;
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
if (!mqd_mgr) {
pr_err("mqd manager [%d] initialization failed\n", i);
goto out_free;
}
dqm->mqd_mgrs[i] = mqd_mgr;
}
return 0;
out_free:
for (j = 0; j < i; j++) {
kfree(dqm->mqd_mgrs[j]);
dqm->mqd_mgrs[j] = NULL;
}
return -ENOMEM;
}
/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
{
int retval;
struct kfd_dev *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
dev->device_info->num_sdma_engines *
dev->device_info->num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
(void *)&(mem_obj->cpu_ptr), true);
return retval;
}
@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.stop = stop_cpsch;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.get_mqd_manager = get_mqd_manager;
dqm->ops.register_process = register_process;
dqm->ops.unregister_process = unregister_process;
dqm->ops.uninitialize = uninitialize;
@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.get_mqd_manager = get_mqd_manager;
dqm->ops.register_process = register_process;
dqm->ops.unregister_process = unregister_process;
dqm->ops.initialize = initialize_nocpsch;
@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
goto out_free;
}
if (init_mqd_managers(dqm))
goto out_free;
if (allocate_hiq_sdma_mqd(dqm)) {
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
goto out_free;
}
if (!dqm->ops.initialize(dqm))
return dqm;
@ -1772,9 +1843,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
return NULL;
}
void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
dqm->ops.uninitialize(dqm);
deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
kfree(dqm);
}

View file

@ -48,8 +48,6 @@ struct device_process_node {
*
* @update_queue: Queue update routine.
*
* @get_mqd_manager: Returns the mqd manager according to the mqd type.
*
* @exeute_queues: Dispatches the queues list to the H/W.
*
* @register_process: This routine associates a specific process with device.
@ -97,10 +95,6 @@ struct device_queue_manager_ops {
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
struct mqd_manager * (*get_mqd_manager)
(struct device_queue_manager *dqm,
enum KFD_MQD_TYPE type);
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
};
/**
@ -185,10 +181,12 @@ struct device_queue_manager {
unsigned int processes_count;
unsigned int queue_count;
unsigned int sdma_queue_count;
unsigned int xgmi_sdma_queue_count;
unsigned int total_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
unsigned int sdma_bitmap;
uint64_t sdma_bitmap;
uint64_t xgmi_sdma_bitmap;
unsigned int vmid_bitmap;
uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem;
@ -201,6 +199,7 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
};
void device_queue_manager_init_cik(
@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{

View file

@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik;
asic_ops->init_sdma_vm = init_sdma_vm;
asic_ops->mqd_manager_init = mqd_manager_init_cik;
}
void device_queue_manager_init_cik_hawaii(
@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik_hawaii;
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)

View file

@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
{
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
}
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)

View file

@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
asic_ops->update_qpd = update_qpd_vi;
asic_ops->init_sdma_vm = init_sdma_vm;
asic_ops->mqd_manager_init = mqd_manager_init_vi;
}
void device_queue_manager_init_vi_tonga(
@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
asic_ops->update_qpd = update_qpd_vi_tonga;
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)

View file

@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
return; /* Presumably process exited. */
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = 1;
memory_exception_data.failure.imprecise = true;
/* Set failure reason */
if (info) {
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;

View file

@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
break;
case CHIP_VEGA10:
@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
return 0;
}

View file

@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->nop_packet = nop.u32all;
switch (type) {
case KFD_QUEUE_TYPE_DIQ:
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
break;
case KFD_QUEUE_TYPE_HIQ:
kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_HIQ);
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
pr_err("Invalid queue type %d\n", type);
@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
kernel_queue_init_vi(&kq->ops_asic_specific);
break;

View file

@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
packet->bitfields2.alloc_format =
alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_compute_vi;
@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;

View file

@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
packet->bitfields2.alloc_format =
alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;

View file

@ -23,34 +23,54 @@
#include "kfd_mqd_manager.h"
#include "amdgpu_amdkfd.h"
#include "kfd_device_queue_manager.h"
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
{
switch (dev->device_info->asic_family) {
case CHIP_KAVERI:
return mqd_manager_init_cik(type, dev);
case CHIP_HAWAII:
return mqd_manager_init_cik_hawaii(type, dev);
case CHIP_CARRIZO:
return mqd_manager_init_vi(type, dev);
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
return mqd_manager_init_vi_tonga(type, dev);
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
return mqd_manager_init_v9(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
}
struct kfd_mem_obj *mqd_mem_obj = NULL;
return NULL;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
return mqd_mem_obj;
}
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj = NULL;
uint64_t offset;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
offset = (q->sdma_engine_id *
dev->device_info->num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset);
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
return mqd_mem_obj;
}
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
WARN_ON(!mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
}
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,

View file

@ -99,8 +99,16 @@ struct mqd_manager {
struct mutex mqd_mutex;
struct kfd_dev *dev;
uint32_t mqd_size;
};
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
struct queue_properties *q);
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj);
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);

View file

@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
if (q->type == KFD_QUEUE_TYPE_HIQ)
return allocate_hiq_mqd(kfd);
if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
&mqd_mem_obj))
return NULL;
return mqd_mem_obj;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
uint64_t addr;
struct cik_mqd *m;
int retval;
struct kfd_dev *kfd = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_mqd(kfd, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct cik_sdma_rlc_registers *m;
struct kfd_dev *dev = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct cik_sdma_rlc_registers),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, struct queue_properties *p,
@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif

View file

@ -67,6 +67,43 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
int retval;
struct kfd_mem_obj *mqd_mem_obj = NULL;
if (q->type == KFD_QUEUE_TYPE_HIQ)
return allocate_hiq_mqd(kfd);
/* From V9, for CWSR, the control stack is located on the next page
* boundary after the mqd, we will use the gtt allocation function
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
if (!mqd_mem_obj)
return NULL;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&(mqd_mem_obj->gtt_mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
} else {
retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
&mqd_mem_obj);
}
if (retval) {
kfree(mqd_mem_obj);
return NULL;
}
return mqd_mem_obj;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@ -76,24 +113,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
struct v9_mqd *m;
struct kfd_dev *kfd = mm->dev;
/* From V9, for CWSR, the control stack is located on the next page
* boundary after the mqd, we will use the gtt allocation function
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!*mqd_mem_obj)
return -ENOMEM;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&((*mqd_mem_obj)->gtt_mem),
&((*mqd_mem_obj)->gpu_addr),
(void *)&((*mqd_mem_obj)->cpu_ptr), true);
} else
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_mqd(kfd, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct v9_sdma_mqd *m;
struct kfd_dev *dev = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct v9_sdma_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
return retval;
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->get_wave_state = get_wave_state;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif

View file

@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
if (q->type == KFD_QUEUE_TYPE_HIQ)
return allocate_hiq_mqd(kfd);
if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
&mqd_mem_obj))
return NULL;
return mqd_mem_obj;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
int retval;
uint64_t addr;
struct vi_mqd *m;
struct kfd_dev *kfd = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_mqd(kfd, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct vi_sdma_mqd *m;
struct kfd_dev *dev = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct vi_sdma_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
memset(m, 0, sizeof(struct vi_sdma_mqd));
*mqd = m;
if (gart_addr != NULL)
if (gart_addr)
*gart_addr = (*mqd_mem_obj)->gpu_addr;
retval = mm->update_mqd(mm, m, q);
@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
return retval;
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->get_wave_state = get_wave_state;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif

Some files were not shown because too many files have changed in this diff Show more