diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 4ea21ae88b07..afde1b75eeee 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -163,6 +163,19 @@ struct kfd2kgd_calls { int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, unsigned int timeout); + int (*address_watch_disable)(struct kgd_dev *kgd); + int (*address_watch_execute)(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); + int (*wave_control_execute)(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); + uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); }; diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index 0ec5d53eb6fb..4e883fdc59d8 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -149,10 +149,30 @@ #define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 +#define SQ_IND_INDEX 0x8DE0 +#define SQ_CMD 0x8DEC +#define SQ_IND_DATA 0x8DE4 + +/* + * The TCP_WATCHx_xxxx addresses that are shown here are in dwords, + * and that's why they are multiplied by 4 + */ +#define TCP_WATCH0_ADDR_H (0x32A0*4) +#define TCP_WATCH1_ADDR_H (0x32A3*4) +#define TCP_WATCH2_ADDR_H (0x32A6*4) +#define TCP_WATCH3_ADDR_H (0x32A9*4) +#define TCP_WATCH0_ADDR_L (0x32A1*4) +#define TCP_WATCH1_ADDR_L (0x32A4*4) +#define TCP_WATCH2_ADDR_L (0x32A7*4) +#define TCP_WATCH3_ADDR_L (0x32AA*4) +#define TCP_WATCH0_CNTL (0x32A2*4) +#define TCP_WATCH1_CNTL (0x32A5*4) +#define TCP_WATCH2_CNTL (0x32A8*4) +#define TCP_WATCH3_CNTL (0x32AB*4) + #define CPC_INT_CNTL 0xC2D0 #define CP_HQD_IQ_RPTR 0xC970u -#define AQL_ENABLE (1U << 0) #define SDMA0_RLC0_RB_CNTL 0xD400u #define SDMA_RB_VMID(x) (x << 24) #define SDMA0_RLC0_RB_BASE 0xD404u @@ -186,4 +206,38 @@ #define SDMA0_CNTL 0xD010 #define SDMA1_CNTL 0xD810 +enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +}; + +enum { + ADDRESS_WATCH_REG_ADDR_HI = 0, + ADDRESS_WATCH_REG_ADDR_LO, + ADDRESS_WATCH_REG_CNTL, + ADDRESS_WATCH_REG_MAX +}; + +enum { /* not defined in the CI/KV reg file */ + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, + /* extend the mask to 26 bits in order to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF +}; + +union TCP_WATCH_CNTL_BITS { + struct { + uint32_t mask:24; + uint32_t vmid:4; + uint32_t atc:1; + uint32_t mode:2; + uint32_t valid:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + #endif diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 813a416db538..fd9590de2605 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -34,6 +34,13 @@ #define CIK_PIPE_PER_MEC (4) +static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { + TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, + TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, + TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, + TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL +}; + struct kgd_mem { struct radeon_bo *bo; uint64_t gpu_addr; @@ -79,6 +86,18 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int timeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -96,6 +115,10 @@ static const struct kfd2kgd_calls kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, .get_fw_version = get_fw_version }; @@ -665,6 +688,96 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + union TCP_WATCH_CNTL_BITS cntl; + unsigned int i; + + cntl.u32All = 0; + + cntl.bitfields.valid = 0; + cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + cntl.bitfields.atc = 1; + + /* Turning off this address until we set all the registers */ + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) + write_register(kgd, + watchRegs[i * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + union TCP_WATCH_CNTL_BITS cntl; + + cntl.u32All = cntl_val; + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_HI], + addr_hi); + + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_LO], + addr_lo); + + /* Enable the watch point */ + cntl.bitfields.valid = 1; + + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + uint32_t data; + + mutex_lock(&rdev->grbm_idx_mutex); + + write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); + write_register(kgd, SQ_CMD, sq_cmd); + + /* Restore the GRBM_GFX_INDEX register */ + + data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | + SE_BROADCAST_WRITES; + + write_register(kgd, GRBM_GFX_INDEX, data); + + mutex_unlock(&rdev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct radeon_device *rdev = (struct radeon_device *) kgd;