drm fixes for 6.6-rc2

connector:
 - doc fix
 
 exec:
 - workaround lockdep issue
 
 tests:
 - fix a UAF
 
 vkms:
 - revert hrtimer fix
 
 fbdev:
 - g364fb: fix build failure with mips
 
 i915:
 - Only check eDP HPD when AUX CH is shared.
 
 amdgpu:
 - GC 9.4.3 fixes
 - Fix white screen issues with S/G display on system with >= 64G of ram
 - Replay fixes
 - SMU 13.0.6 fixes
 - AUX backlight fix
 - NBIO 4.3 SR-IOV fixes for HDP
 - RAS fixes
 - DP MST resume fix
 - Fix segfault on systems with no vbios
 - DPIA fixes
 
 amdkfd:
 - CWSR grace period fix
 - Unaligned doorbell fix
 - CRIU fix for GFX11
 - Add missing TLB flush on gfx10 and newer
 
 radeon:
 - make fence wait in suballocator uninterrruptable
 
 gm12u320:
 - Fix the timeout usage for usb_bulk_msg()
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmUDxa8ACgkQDHTzWXnE
 hr5pUBAAih6IWIWlK/yOcCJIJ7h1PxUC8rGbK2LSYMBPWQGk/2UyPvsL26xIOJ2e
 bgoqwLOWFQuPZXkNMqTndDe8Az6NOh2OFDDQXumoMVnXdG3E3z0xOlByvsPxRgYd
 4RQi2JP7D9BPBk8aOKLjiftmN85ON33JoZIeSVm4k6pik1cE70p3pBQVkC8Na5vc
 +r4U7th/Sb2UxtLaUL4eclXqQPT8DfWeIiyJL2T/LMyXt3bQpnzf0hE8jsX+meQu
 uFQKZZ2ZQg21zOxaDSwNUAGasIHgwsOwNygrXG0OWUWngWwT2vZRPvjEcmOLtOr/
 ZLeWO7Y1RoGrP0TBo6Dx4iHUitt4evjF/tPfe+WzQIsaq2NjaVUU6UzHo0sui5rB
 YgWuvI8B7tnPQqfge8eU82Hts6f8lciP4ATdV2dT9EY7YLtjvfHqAGo8bCS7u0cw
 GXsViHbYqIUyb4UXNa1YNBF9RW+tJUebSYRg1o1QuURVP5ehp8ZP51o1z0np6/sA
 rHnKYjoioRE4DPb2T4iJ4C7jTaDXdZo5+/roYxJNQWZJssritzrPAezES//Cbiqm
 4FVlxz5P8ZdwufTySHQnUTS9J0CgiVql0cKC8qxivNEWHyo693G0O6/aH4qiLBtT
 E0ACUvPr7wGJ5a5am/wl8WZSY4clqLPvQ9s4vtCZeg/mOOgRNsI=
 =aU1D
 -----END PGP SIGNATURE-----

Merge tag 'drm-fixes-2023-09-15' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "Regular rc2 fixes pull, mostly made up of amdgpu stuff, one i915, and
  a bunch of others, one vkms locking violation is reverted.

  connector:
   - doc fix

  exec:
   - workaround lockdep issue

  tests:
   - fix a UAF

  vkms:
   - revert hrtimer fix

  fbdev:
   - g364fb: fix build failure with mips

  i915:
   - Only check eDP HPD when AUX CH is shared.

  amdgpu:
   - GC 9.4.3 fixes
   - Fix white screen issues with S/G display on system with >= 64G of ram
   - Replay fixes
   - SMU 13.0.6 fixes
   - AUX backlight fix
   - NBIO 4.3 SR-IOV fixes for HDP
   - RAS fixes
   - DP MST resume fix
   - Fix segfault on systems with no vbios
   - DPIA fixes

  amdkfd:
   - CWSR grace period fix
   - Unaligned doorbell fix
   - CRIU fix for GFX11
   - Add missing TLB flush on gfx10 and newer

  radeon:
   - make fence wait in suballocator uninterrruptable

  gm12u320:
   - Fix the timeout usage for usb_bulk_msg()"

* tag 'drm-fixes-2023-09-15' of git://anongit.freedesktop.org/drm/drm: (29 commits)
  drm/tests: helpers: Avoid a driver uaf
  Revert "drm/vkms: Fix race-condition between the hrtimer and the atomic commit"
  drm/amdkfd: Insert missing TLB flush on GFX10 and later
  drm/i915: Only check eDP HPD when AUX CH is shared
  drm/amd/display: Fix 2nd DPIA encoder Assignment
  drm/amd/display: Add DPIA Link Encoder Assignment Fix
  drm/amd/display: fix replay_mode kernel-doc warning
  drm/amdgpu: Handle null atom context in VBIOS info ioctl
  drm/amdkfd: Checkpoint and restore queues on GFX11
  drm/amd/display: Adjust the MST resume flow
  drm/amdgpu: fallback to old RAS error message for aqua_vanjaram
  drm/amdgpu/nbio4.3: set proper rmmio_remap.reg_offset for SR-IOV
  drm/amdgpu/soc21: don't remap HDP registers for SR-IOV
  drm/amd/display: Don't check registers, if using AUX BL control
  drm/amdgpu: fix retry loop test
  drm/amd/display: Add dirty rect support for Replay
  Revert "drm/amd: Disable S/G for APUs when 64GB or more host memory"
  drm/amd/display: fix the white screen issue when >= 64GB DRAM
  drm/amdkfd: Update CU masking for GFX 9.4.3
  drm/amdkfd: Update cache info reporting for GFX v9.4.3
  ...
This commit is contained in:
Linus Torvalds 2023-09-15 13:25:52 -07:00
commit 9608c7b729
57 changed files with 460 additions and 231 deletions

View File

@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);

View File

@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c
cu_info->cu_active_number = acu_info.number;
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
sizeof(acu_info.bitmap));
sizeof(cu_info->cu_bitmap));
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;

View File

@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst)
uint32_t *reg_data)
{
*reg_data = wait_times;

View File

@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst);
uint32_t *reg_data);

View File

@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst)
uint32_t *reg_data)
{
*reg_data = wait_times;
@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
SCH_WAVE,
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
mmCP_IQ_WAIT_TIME2);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,

View File

@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst);
uint32_t *reg_data);

View File

@ -1244,32 +1244,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
}
/*
* On APUs with >= 64GB white flickering has been observed w/ SG enabled.
* Disable S/G on such systems until we have a proper fix.
* https://gitlab.freedesktop.org/drm/amd/-/issues/2354
* https://gitlab.freedesktop.org/drm/amd/-/issues/2735
*/
bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
{
switch (amdgpu_sg_display) {
case -1:
break;
case 0:
return false;
case 1:
return true;
default:
return false;
}
if ((totalram_pages() << (PAGE_SHIFT - 10)) +
(adev->gmc.real_vram_size / 1024) >= 64000000) {
DRM_WARN("Disabling S/G due to >=64GB RAM\n");
return false;
}
return true;
}
/*
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
* speed switching. Until we have confirmation from Intel that a specific host

View File

@ -43,6 +43,7 @@
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
#define AMDGPU_MAX_GC_INSTANCES 8
#define KGD_MAX_QUEUES 128
#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@ -257,7 +258,7 @@ struct amdgpu_cu_info {
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
uint32_t bitmap[4][4];
uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
struct amdgpu_gfx_ras {

View File

@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
sizeof(adev->gfx.cu_info.bitmap));
sizeof(dev_info->cu_bitmap));
dev_info->vram_type = adev->gmc.vram_type;
dev_info->vram_bit_width = adev->gmc.vram_width;
dev_info->vce_harvest_config = adev->vce.harvest_config;
@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct atom_context *atom_context;
atom_context = adev->mode_info.atom_context;
memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name));
memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn));
vbios_info.version = atom_context->version;
memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
sizeof(atom_context->vbios_ver_str));
memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date));
if (atom_context) {
memcpy(vbios_info.name, atom_context->name,
sizeof(atom_context->name));
memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
sizeof(atom_context->vbios_pn));
vbios_info.version = atom_context->version;
memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
sizeof(atom_context->vbios_ver_str));
memcpy(vbios_info.date, atom_context->date,
sizeof(atom_context->date));
}
return copy_to_user(out, &vbios_info,
min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;

View File

@ -1052,7 +1052,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
info->ce_count = obj->err_data.ce_count;
if (err_data.ce_count) {
if (adev->smuio.funcs &&
if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
@ -1072,7 +1073,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
}
}
if (err_data.ue_count) {
if (adev->smuio.funcs &&
if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "

View File

@ -81,7 +81,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
unsigned int size)
{
struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
GFP_KERNEL, true, 0);
GFP_KERNEL, false, 0);
if (IS_ERR(sa)) {
*sa_bo = NULL;

View File

@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {

View File

@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
* SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
* SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
*/
cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask)

View File

@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
gfx_v6_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v6_0_get_cu_enabled(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {

View File

@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {

View File

@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {

View File

@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (cu_info->bitmap[i][j] & mask) {
if (cu_info->bitmap[0][i][j] & mask) {
if (counter == pg_always_on_cu_num)
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
if (counter < always_on_cu_num)
@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {

View File

@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
}
static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
u32 bitmap)
u32 bitmap, int xcc_id)
{
u32 data;
@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
}
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
{
u32 data, mask;
data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info)
{
int i, j, k, counter, active_cu_number = 0;
int i, j, k, counter, xcc_id, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
unsigned disable_masks[4 * 4];
@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
adev->gfx.config.max_sh_per_se);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
gfx_v9_4_3_set_user_cu_inactive_bitmap(
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
gfx_v9_4_3_set_user_cu_inactive_bitmap(
adev,
disable_masks[i * adev->gfx.config.max_sh_per_se + j],
xcc_id);
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
/*
* The bitmap(and ao_cu_bitmap) in cu_info structure is
* 4x4 size array, and it's usually suitable for Vega
* ASICs which has 4*2 SE/SH layout.
* But for Arcturus, SE/SH layout is changed to 8*1.
* To mostly reduce the impact, we make it compatible
* with current bitmap array as below:
* SE4,SH0 --> bitmap[0][1]
* SE5,SH0 --> bitmap[1][1]
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
cu_info->bitmap[xcc_id][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
if (counter < adev->gfx.config.max_cu_per_sh)
ao_bitmap |= mask;
counter++;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
if (counter < adev->gfx.config.max_cu_per_sh)
ao_bitmap |= mask;
counter++;
}
mask <<= 1;
}
mask <<= 1;
active_cu_number += counter;
if (i < 2 && j < 2)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
active_cu_number += counter;
if (i < 2 && j < 2)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
}
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;

View File

@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
}
if (amdgpu_sriov_vf(adev))
adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)

View File

@ -766,7 +766,7 @@ static int soc21_common_hw_init(void *handle)
* for the purpose of expose those registers
* to process space
*/
if (adev->nbio.funcs->remap_hdp_registers)
if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
adev->nbio.funcs->enable_doorbell_aperture(adev, true);

View File

@ -2087,7 +2087,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
cu->num_simd_cores = cu_info.simd_per_cu *
(cu_info.cu_active_number / kdev->kfd->num_nodes);
cu->max_waves_simd = cu_info.max_waves_per_simd;
cu->wave_front_size = cu_info.wave_front_size;

View File

@ -79,6 +79,10 @@ struct crat_header {
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
#define CRAT_SUBTYPE_MAX 6
/*
* Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
* as it breaks the ABI.
*/
#define CRAT_SIBLINGMAP_SIZE 32
/*

View File

@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->dev->kfd2kgd->build_grace_period_packet_info(
dqm->dev->adev, dqm->wait_times,
grace_period, &reg_offset,
&dqm->wait_times,
ffs(dqm->dev->xcc_mask) - 1);
&dqm->wait_times);
}
dqm_unlock(dqm);

View File

@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
return NULL;
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
inx *= 2;
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
inx /= 2;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);

View File

@ -97,18 +97,22 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask)
uint32_t *se_mask, uint32_t inst)
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
uint32_t cu_active_per_node;
int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;
cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
if (cu_mask_count > cu_active_per_node)
cu_mask_count = cu_active_per_node;
/* Exceeding these bounds corrupts the stack and indicates a coding error.
* Returning with no CU's enabled will hang the queue, which should be
@ -141,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
cu_per_sh[se][sh] = hweight32(
cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
* se_mask programs up to 2 SH in the upper and lower 16 bits.
@ -164,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
* ...
*
* For GFX 9.4.3, the following code only looks at a
* subset of the cu_mask corresponding to the inst parameter.
* If we have n XCCs under one GPU node
* cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
* cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
* ..
* cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
* cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
*
* For example, if there are 6 XCCs under 1 KFD node, this code
* running for each inst, will look at the bits as:
* inst, inst + 6, inst + 12...
*
* First ensure all CUs are disabled, then enable user specified CUs.
*/
for (i = 0; i < cu_info.num_shader_engines; i++)
se_mask[i] = 0;
i = 0;
for (cu = 0; cu < 16; cu += inc) {
i = inst;
for (cu = 0; cu < 16; cu += cu_inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
if (cu_mask[i / 32] & (en_mask << (i % 32)))
se_mask[se] |= en_mask << (cu + sh * 16);
i += inc;
if (i == cu_mask_count)
if (i >= cu_mask_count)
return;
}
}

View File

@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
uint32_t *se_mask, uint32_t inst);
int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,

View File

@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];

View File

@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];

View File

@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
}
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
{
struct v11_compute_mqd *m;
m = get_mqd(mqd);
memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
}
static void restore_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *qp,
const void *mqd_src,
const void *ctl_stack_src, const u32 ctl_stack_size)
{
uint64_t addr;
struct v11_compute_mqd *m;
m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
addr = mqd_mem_obj->gpu_addr;
memcpy(m, mqd_src, sizeof(*m));
*mqd = m;
if (gart_addr)
*gart_addr = addr;
m->cp_hqd_pq_doorbell_control =
qp->doorbell_off <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
m->cp_hqd_pq_doorbell_control);
qp->is_active = 0;
}
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@ -458,6 +495,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
mqd->mqd_stride = kfd_mqd_stride;
mqd->checkpoint_mqd = checkpoint_mqd;
mqd->restore_mqd = restore_mqd;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@ -502,6 +541,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
mqd->checkpoint_mqd = checkpoint_mqd;
mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)

View File

@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct mqd_update_info *minfo)
struct mqd_update_info *minfo, uint32_t inst)
{
struct v9_mqd *m;
uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
m->compute_static_thread_mgmt_se7 = se_mask[7];
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
m->compute_static_thread_mgmt_se7 = se_mask[7];
pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3,
m->compute_static_thread_mgmt_se4,
m->compute_static_thread_mgmt_se5,
m->compute_static_thread_mgmt_se6,
m->compute_static_thread_mgmt_se7);
pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3,
m->compute_static_thread_mgmt_se4,
m->compute_static_thread_mgmt_se5,
m->compute_static_thread_mgmt_se6,
m->compute_static_thread_mgmt_se7);
} else {
pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
inst, m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3);
}
}
static void set_priority(struct v9_mqd *m, struct queue_properties *q)
@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, minfo);
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
@ -676,6 +686,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
update_cu_mask(mm, mqd, minfo, xcc);
if (q->format == KFD_QUEUE_FORMAT_AQL) {
switch (xcc) {
case 0:

View File

@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];

View File

@ -299,8 +299,7 @@ static int pm_set_grace_period_v9(struct packet_manager *pm,
pm->dqm->wait_times,
grace_period,
&reg_offset,
&reg_data,
0);
&reg_data);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
reg_data = pm->dqm->wait_times;

View File

@ -1466,8 +1466,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}

View File

@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
dev->gpu ? (dev->node_props.simd_count *
NUM_XCC(dev->gpu->xcc_mask)) : 0);
dev->gpu ? dev->node_props.simd_count : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@ -1597,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int cache_type, unsigned int cu_processor_id)
int cache_type, unsigned int cu_processor_id,
struct kfd_node *knode)
{
unsigned int cu_sibling_map_mask;
int first_active_cu;
int i, j, k;
int i, j, k, xcc, start, end;
struct kfd_cache_properties *pcache = NULL;
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
start = ffs(knode->xcc_mask) - 1;
end = start + NUM_XCC(knode->xcc_mask);
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@ -1639,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
k = 0;
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
for (xcc = start; xcc < end; xcc++) {
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
}
}
}
pcache->sibling_map_size = k;
@ -1666,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
{
struct kfd_gpu_cache_info *pcache_info = NULL;
int i, j, k;
int i, j, k, xcc, start, end;
int ct = 0;
unsigned int cu_processor_id;
int ret;
@ -1700,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
* then it will consider only one CU from
* the shared unit
*/
start = ffs(kdev->xcc_mask) - 1;
end = start + NUM_XCC(kdev->xcc_mask);
for (ct = 0; ct < num_of_cache_types; ct++) {
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
for (i = 0; i < pcu_info->num_shader_engines; i++) {
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
for (xcc = start; xcc < end; xcc++) {
for (i = 0; i < pcu_info->num_shader_engines; i++) {
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
cu_processor_id, k);
if (ret < 0)
break;
if (ret < 0)
break;
if (!ret) {
num_of_entries++;
list_add_tail(&props_ext->list, &dev->cache_props);
if (!ret) {
num_of_entries++;
list_add_tail(&props_ext->list, &dev->cache_props);
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
pcu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(pcu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
pcu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(pcu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
}
}
} else {
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
pcu_info, ct, cu_processor_id);
pcu_info, ct, cu_processor_id, kdev);
if (ret < 0)
break;

View File

@ -89,7 +89,7 @@ struct kfd_mem_properties {
struct attribute attr;
};
#define CACHE_SIBLINGMAP_SIZE 64
#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
struct list_head list;

View File

@ -1274,11 +1274,15 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF;
page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12);
page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF;
page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12);
page_table_base.high_part = upper_32_bits(pt_base) & 0xF;
page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >>
AMDGPU_GPU_PAGE_SHIFT);
page_table_base.high_part = upper_32_bits(pt_base);
page_table_base.low_part = lower_32_bits(pt_base);
pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
@ -1640,8 +1644,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
break;
}
if (init_data.flags.gpu_vm_support)
init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
if (init_data.flags.gpu_vm_support &&
(amdgpu_sg_display == 0))
init_data.flags.gpu_vm_support = false;
if (init_data.flags.gpu_vm_support)
adev->mode_info.gpu_vm_support = true;
@ -2335,14 +2340,62 @@ static int dm_late_init(void *handle)
return detect_mst_link_for_all_connectors(adev_to_drm(adev));
}
static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
{
int ret;
u8 guid[16];
u64 tmp64;
mutex_lock(&mgr->lock);
if (!mgr->mst_primary)
goto out_fail;
if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
goto out_fail;
}
ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
DP_MST_EN |
DP_UP_REQ_EN |
DP_UPSTREAM_IS_SRC);
if (ret < 0) {
drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n");
goto out_fail;
}
/* Some hubs forget their guids after they resume */
ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
if (ret != 16) {
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
goto out_fail;
}
if (memchr_inv(guid, 0, 16) == NULL) {
tmp64 = get_jiffies_64();
memcpy(&guid[0], &tmp64, sizeof(u64));
memcpy(&guid[8], &tmp64, sizeof(u64));
ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16);
if (ret != 16) {
drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
goto out_fail;
}
}
memcpy(mgr->mst_primary->guid, guid, 16);
out_fail:
mutex_unlock(&mgr->lock);
}
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct drm_dp_mst_topology_mgr *mgr;
int ret;
bool need_hotplug = false;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@ -2364,18 +2417,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
if (!dp_is_lttpr_present(aconnector->dc_link))
try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
ret = drm_dp_mst_topology_mgr_resume(mgr, true);
if (ret < 0) {
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
aconnector->dc_link);
need_hotplug = true;
}
/* TODO: move resume_mst_branch_status() into drm mst resume again
* once topology probing work is pulled out from mst resume into mst
* resume 2nd step. mst resume 2nd step should be called after old
* state getting restored (i.e. drm_atomic_helper_resume()).
*/
resume_mst_branch_status(mgr);
}
}
drm_connector_list_iter_end(&iter);
if (need_hotplug)
drm_kms_helper_hotplug_event(dev);
}
static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
@ -2769,7 +2819,8 @@ static int dm_resume(void *handle)
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
enum dc_connection_type new_connection_type = dc_connection_none;
struct dc_state *dc_state;
int i, r, j;
int i, r, j, ret;
bool need_hotplug = false;
if (amdgpu_in_reset(adev)) {
dc_state = dm->cached_dc_state;
@ -2867,7 +2918,7 @@ static int dm_resume(void *handle)
continue;
/*
* this is the case when traversing through already created
* this is the case when traversing through already created end sink
* MST connectors, should be skipped
*/
if (aconnector && aconnector->mst_root)
@ -2927,6 +2978,27 @@ static int dm_resume(void *handle)
dm->cached_state = NULL;
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
continue;
ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true);
if (ret < 0) {
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
aconnector->dc_link);
need_hotplug = true;
}
}
drm_connector_list_iter_end(&iter);
if (need_hotplug)
drm_kms_helper_hotplug_event(ddev);
amdgpu_dm_irq_resume_late(adev);
amdgpu_dm_smu_write_watermarks_table(adev);
@ -8073,7 +8145,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].plane_info =
&bundle->plane_infos[planes_count];
if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) {
if (acrtc_state->stream->link->psr_settings.psr_feature_enabled ||
acrtc_state->stream->link->replay_settings.replay_feature_enabled) {
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
&bundle->flip_addrs[planes_count],

View File

@ -620,7 +620,7 @@ struct amdgpu_hdmi_vsdb_info {
unsigned int max_refresh_rate_hz;
/**
* @replay mode: Replay supported
* @replay_mode: Replay supported
*/
bool replay_mode;
};

View File

@ -169,11 +169,23 @@ static void add_link_enc_assignment(
/* Return first available DIG link encoder. */
static enum engine_id find_first_avail_link_enc(
const struct dc_context *ctx,
const struct dc_state *state)
const struct dc_state *state,
enum engine_id eng_id_requested)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
int i;
if (eng_id_requested != ENGINE_ID_UNKNOWN) {
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
if (eng_id == eng_id_requested)
return eng_id;
}
}
eng_id = ENGINE_ID_UNKNOWN;
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
if (eng_id != ENGINE_ID_UNKNOWN)
@ -287,7 +299,7 @@ void link_enc_cfg_link_encs_assign(
struct dc_stream_state *streams[],
uint8_t stream_count)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN;
int i;
int j;
@ -377,8 +389,14 @@ void link_enc_cfg_link_encs_assign(
* assigned to that endpoint.
*/
link_enc = get_link_enc_used_by_link(state, stream->link);
if (link_enc == NULL)
eng_id = find_first_avail_link_enc(stream->ctx, state);
if (link_enc == NULL) {
if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN)
eng_id_req = stream->link->dpia_preferred_eng_id;
eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req);
}
else
eng_id = link_enc->preferred_engine;
@ -402,7 +420,9 @@ void link_enc_cfg_link_encs_assign(
DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n",
__func__,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
assignment.ep_id.link_id.enum_id - 1,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
assignment.ep_id.link_id.enum_id :
assignment.ep_id.link_id.enum_id - 1,
assignment.eng_id);
}
for (i = 0; i < MAX_PIPES; i++) {
@ -413,7 +433,9 @@ void link_enc_cfg_link_encs_assign(
DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n",
__func__,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
assignment.ep_id.link_id.enum_id - 1,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
assignment.ep_id.link_id.enum_id :
assignment.ep_id.link_id.enum_id - 1,
assignment.eng_id);
}
@ -478,7 +500,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
if (stream)
link = stream->link;
// dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id);
return link;
}

View File

@ -1496,6 +1496,7 @@ struct dc_link {
* object creation.
*/
enum engine_id eng_id;
enum engine_id dpia_preferred_eng_id;
bool test_pattern_enabled;
enum dp_test_pattern current_test_pattern;

View File

@ -964,7 +964,9 @@ void dce110_edp_backlight_control(
return;
}
if (link->panel_cntl) {
if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled ||
link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl);
if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) {

View File

@ -1032,6 +1032,28 @@ static const struct dce_i2c_mask i2c_masks = {
I2C_COMMON_MASK_SH_LIST_DCN30(_MASK)
};
/* ========================================================== */
/*
* DPIA index | Preferred Encoder | Host Router
* 0 | C | 0
* 1 | First Available | 0
* 2 | D | 1
* 3 | First Available | 1
*/
/* ========================================================== */
static const enum engine_id dpia_to_preferred_enc_id_table[] = {
ENGINE_ID_DIGC,
ENGINE_ID_DIGC,
ENGINE_ID_DIGD,
ENGINE_ID_DIGD
};
static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index)
{
return dpia_to_preferred_enc_id_table[dpia_index];
}
static struct dce_i2c_hw *dcn31_i2c_hw_create(
struct dc_context *ctx,
uint32_t inst)
@ -1785,6 +1807,7 @@ static struct resource_funcs dcn314_res_pool_funcs = {
.update_bw_bounding_box = dcn314_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn314_get_panel_config_defaults,
.get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia,
};
static struct clock_source *dcn30_clock_source_create(

View File

@ -65,6 +65,7 @@ struct resource_context;
struct clk_bw_params;
struct resource_funcs {
enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index);
void (*destroy)(struct resource_pool **pool);
void (*link_init)(struct dc_link *link);
struct panel_cntl*(*panel_cntl_create)(

View File

@ -791,6 +791,10 @@ static bool construct_dpia(struct dc_link *link,
/* Set dpia port index : 0 to number of dpia ports */
link->ddc_hw_inst = init_params->connector_index;
// Assign Dpia preferred eng_id
if (link->dc->res_pool->funcs->get_preferred_eng_id_dpia)
link->dpia_preferred_eng_id = link->dc->res_pool->funcs->get_preferred_eng_id_dpia(link->ddc_hw_inst);
/* TODO: Create link encoder */
link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;

View File

@ -31,12 +31,12 @@
#include <linux/types.h>
#include <linux/bitmap.h>
#include <linux/dma-fence.h>
#include "amdgpu_irq.h"
#include "amdgpu_gfx.h"
struct pci_dev;
struct amdgpu_device;
#define KGD_MAX_QUEUES 128
struct kfd_dev;
struct kgd_mem;
@ -68,7 +68,7 @@ struct kfd_cu_info {
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
uint32_t cu_bitmap[4][4];
uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
/* For getting GPU local memory information from KGD */
@ -326,8 +326,7 @@ struct kfd2kgd_calls {
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data,
uint32_t inst);
uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
void (*program_trap_handler_settings)(struct amdgpu_device *adev,

View File

@ -336,7 +336,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
/* Store one-time values in driver PPTable */
if (!pptable->Init) {
while (retry--) {
while (--retry) {
ret = smu_v13_0_6_get_metrics_table(smu, NULL, true);
if (ret)
return ret;

View File

@ -2203,6 +2203,7 @@ static int drm_mode_create_colorspace_property(struct drm_connector *connector,
/**
* drm_mode_create_hdmi_colorspace_property - create hdmi colorspace property
* @connector: connector to create the Colorspace property on.
* @supported_colorspaces: bitmap of supported color spaces
*
* Called by a driver the first time it's needed, must be attached to desired
* HDMI connectors.
@ -2227,6 +2228,7 @@ EXPORT_SYMBOL(drm_mode_create_hdmi_colorspace_property);
/**
* drm_mode_create_dp_colorspace_property - create dp colorspace property
* @connector: connector to create the Colorspace property on.
* @supported_colorspaces: bitmap of supported color spaces
*
* Called by a driver the first time it's needed, must be attached to desired
* DP connectors.

View File

@ -56,7 +56,7 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
struct drm_gem_object *obj;
unsigned long index;
drm_exec_for_each_locked_object(exec, index, obj) {
drm_exec_for_each_locked_object_reverse(exec, index, obj) {
dma_resv_unlock(obj->resv);
drm_gem_object_put(obj);
}

View File

@ -3540,6 +3540,27 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata)
return map_aux_ch(devdata->i915, devdata->child.aux_channel);
}
bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata)
{
struct drm_i915_private *i915;
u8 aux_channel;
int count = 0;
if (!devdata || !devdata->child.aux_channel)
return false;
i915 = devdata->i915;
aux_channel = devdata->child.aux_channel;
list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) {
if (intel_bios_encoder_supports_dp(devdata) &&
aux_channel == devdata->child.aux_channel)
count++;
}
return count > 1;
}
int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata)
{
if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost)

View File

@ -273,6 +273,7 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata);
bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata);

View File

@ -5512,8 +5512,13 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
/*
* VBT and straps are liars. Also check HPD as that seems
* to be the most reliable piece of information available.
*
* ... expect on devices that forgot to hook HPD up for eDP
* (eg. Acer Chromebook C710), so we'll check it only if multiple
* ports are attempting to use the same AUX CH, according to VBT.
*/
if (!intel_digital_port_connected(encoder)) {
if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) &&
!intel_digital_port_connected(encoder)) {
/*
* If this fails, presume the DPCD answer came
* from some other port using the same AUX CH.

View File

@ -123,7 +123,7 @@ int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager,
unsigned int size, unsigned int align)
{
struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
GFP_KERNEL, true, align);
GFP_KERNEL, false, align);
if (IS_ERR(sa)) {
*sa_bo = NULL;

View File

@ -70,10 +70,10 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)");
#define READ_STATUS_SIZE 13
#define MISC_VALUE_SIZE 4
#define CMD_TIMEOUT msecs_to_jiffies(200)
#define DATA_TIMEOUT msecs_to_jiffies(1000)
#define IDLE_TIMEOUT msecs_to_jiffies(2000)
#define FIRST_FRAME_TIMEOUT msecs_to_jiffies(2000)
#define CMD_TIMEOUT 200
#define DATA_TIMEOUT 1000
#define IDLE_TIMEOUT 2000
#define FIRST_FRAME_TIMEOUT 2000
#define MISC_REQ_GET_SET_ECO_A 0xff
#define MISC_REQ_GET_SET_ECO_B 0x35
@ -389,7 +389,7 @@ static void gm12u320_fb_update_work(struct work_struct *work)
* switches back to showing its logo.
*/
queue_delayed_work(system_long_wq, &gm12u320->fb_update.work,
IDLE_TIMEOUT);
msecs_to_jiffies(IDLE_TIMEOUT));
return;
err:

View File

@ -408,15 +408,10 @@ void vkms_set_composer(struct vkms_output *out, bool enabled)
if (enabled)
drm_crtc_vblank_get(&out->crtc);
mutex_lock(&out->enabled_lock);
spin_lock_irq(&out->lock);
old_enabled = out->composer_enabled;
out->composer_enabled = enabled;
/* the composition wasn't enabled, so unlock the lock to make sure the lock
* will be balanced even if we have a failed commit
*/
if (!out->composer_enabled)
mutex_unlock(&out->enabled_lock);
spin_unlock_irq(&out->lock);
if (old_enabled)
drm_crtc_vblank_put(&out->crtc);

View File

@ -16,7 +16,7 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
struct drm_crtc *crtc = &output->crtc;
struct vkms_crtc_state *state;
u64 ret_overrun;
bool ret, fence_cookie, composer_enabled;
bool ret, fence_cookie;
fence_cookie = dma_fence_begin_signalling();
@ -25,15 +25,15 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
if (ret_overrun != 1)
pr_warn("%s: vblank timer overrun\n", __func__);
spin_lock(&output->lock);
ret = drm_crtc_handle_vblank(crtc);
if (!ret)
DRM_ERROR("vkms failure on handling vblank");
state = output->composer_state;
composer_enabled = output->composer_enabled;
mutex_unlock(&output->enabled_lock);
spin_unlock(&output->lock);
if (state && composer_enabled) {
if (state && output->composer_enabled) {
u64 frame = drm_crtc_accurate_vblank_count(crtc);
/* update frame_start only if a queued vkms_composer_worker()
@ -295,7 +295,6 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
spin_lock_init(&vkms_out->lock);
spin_lock_init(&vkms_out->composer_lock);
mutex_init(&vkms_out->enabled_lock);
vkms_out->composer_workq = alloc_ordered_workqueue("vkms_composer", 0);
if (!vkms_out->composer_workq)

View File

@ -108,10 +108,8 @@ struct vkms_output {
struct workqueue_struct *composer_workq;
/* protects concurrent access to composer */
spinlock_t lock;
/* guarantees that if the composer is enabled, a job will be queued */
struct mutex enabled_lock;
/* protected by @enabled_lock */
/* protected by @lock */
bool composer_enabled;
struct vkms_crtc_state *composer_state;

View File

@ -51,6 +51,20 @@ struct drm_exec {
struct drm_gem_object *prelocked;
};
/**
* drm_exec_obj() - Return the object for a give drm_exec index
* @exec: Pointer to the drm_exec context
* @index: The index.
*
* Return: Pointer to the locked object corresponding to @index if
* index is within the number of locked objects. NULL otherwise.
*/
static inline struct drm_gem_object *
drm_exec_obj(struct drm_exec *exec, unsigned long index)
{
return index < exec->num_objects ? exec->objects[index] : NULL;
}
/**
* drm_exec_for_each_locked_object - iterate over all the locked objects
* @exec: drm_exec object
@ -59,10 +73,23 @@ struct drm_exec {
*
* Iterate over all the locked GEM objects inside the drm_exec object.
*/
#define drm_exec_for_each_locked_object(exec, index, obj) \
for (index = 0, obj = (exec)->objects[0]; \
index < (exec)->num_objects; \
++index, obj = (exec)->objects[index])
#define drm_exec_for_each_locked_object(exec, index, obj) \
for ((index) = 0; ((obj) = drm_exec_obj(exec, index)); ++(index))
/**
* drm_exec_for_each_locked_object_reverse - iterate over all the locked
* objects in reverse locking order
* @exec: drm_exec object
* @index: unsigned long index for the iteration
* @obj: the current GEM object
*
* Iterate over all the locked GEM objects inside the drm_exec object in
* reverse locking order. Note that @index may go below zero and wrap,
* but that will be caught by drm_exec_obj(), returning a NULL object.
*/
#define drm_exec_for_each_locked_object_reverse(exec, index, obj) \
for ((index) = (exec)->num_objects - 1; \
((obj) = drm_exec_obj(exec, index)); --(index))
/**
* drm_exec_until_all_locked - loop until all GEM objects are locked

View File

@ -3,6 +3,8 @@
#ifndef DRM_KUNIT_HELPERS_H_
#define DRM_KUNIT_HELPERS_H_
#include <linux/device.h>
#include <kunit/test.h>
struct drm_device;
@ -51,7 +53,7 @@ __drm_kunit_helper_alloc_drm_device(struct kunit *test,
{
struct drm_driver *driver;
driver = kunit_kzalloc(test, sizeof(*driver), GFP_KERNEL);
driver = devm_kzalloc(dev, sizeof(*driver), GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, driver);
driver->driver_features = features;