drm/msm/a6xx: Improve gpu recovery sequence

We can do a few more things to improve our chance at a successful gpu
recovery, especially during a hangcheck timeout:
1. Halt CP and GMU core
2. Do RBBM GBIF HALT sequence
3. Do a soft reset of GPU core

Signed-off-by: Akhil P Oommen <quic_akhilpo@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/498400/
Link: https://lore.kernel.org/r/20220819015030.v5.6.Idf2ba51078e87ae7ceb75cc77a5bd4ff2bd31eab@changeid
Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
Akhil P Oommen 2022-08-19 01:52:14 +05:30 committed by Rob Clark
parent 1f6cca4049
commit 3a9dd708b9
3 changed files with 58 additions and 30 deletions

View File

@ -1413,6 +1413,10 @@ static inline uint32_t REG_A6XX_RBBM_PERFCTR_RBBM_SEL(uint32_t i0) { return 0x00
#define REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL 0x00000011
#define REG_A6XX_RBBM_GBIF_HALT 0x00000016
#define REG_A6XX_RBBM_GBIF_HALT_ACK 0x00000017
#define REG_A6XX_RBBM_WAIT_FOR_GPU_IDLE_CMD 0x0000001c
#define A6XX_RBBM_WAIT_FOR_GPU_IDLE_CMD_WAIT_GPU_IDLE 0x00000001

View File

@ -873,9 +873,47 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu)
(val & 1), 100, 1000);
}
#define GBIF_CLIENT_HALT_MASK BIT(0)
#define GBIF_ARB_HALT_MASK BIT(1)
static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
{
struct msm_gpu *gpu = &adreno_gpu->base;
if (!a6xx_has_gbif(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
0xf) == 0xf);
gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
return;
}
/* Halt the gx side of GBIF */
gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
/* Halt new client requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
/* Halt all AXI requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
/* The GBIF halt needs to be explicitly cleared */
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
}
/* Force the GMU off in case it isn't responsive */
static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
{
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
struct msm_gpu *gpu = &adreno_gpu->base;
/* Flush all the queues */
a6xx_hfi_stop(gmu);
@ -887,6 +925,15 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Make sure there are no outstanding RPMh votes */
a6xx_gmu_rpmh_off(gmu);
/* Halt the gmu cm3 core */
gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1);
a6xx_bus_clear_pending_transactions(adreno_gpu);
/* Reset GPU core blocks */
gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, 1);
udelay(100);
}
static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
@ -1014,36 +1061,6 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu)
return true;
}
#define GBIF_CLIENT_HALT_MASK BIT(0)
#define GBIF_ARB_HALT_MASK BIT(1)
static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
{
struct msm_gpu *gpu = &adreno_gpu->base;
if (!a6xx_has_gbif(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
0xf) == 0xf);
gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
return;
}
/* Halt new client requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
/* Halt all AXI requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
/* The GBIF halt needs to be explicitly cleared */
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
}
/* Gracefully try to shut down the GMU and by extension the GPU */
static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
{

View File

@ -988,6 +988,10 @@ static int hw_init(struct msm_gpu *gpu)
/* Make sure the GMU keeps the GPU on while we set it up */
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
/* Clear GBIF halt in case GX domain was not collapsed */
if (a6xx_has_gbif(adreno_gpu))
gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
/*
@ -1273,6 +1277,9 @@ static void a6xx_recover(struct msm_gpu *gpu)
if (hang_debug)
a6xx_dump(gpu);
/* Halt SQE first */
gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
/*
* Turn off keep alive that might have been enabled by the hang
* interrupt