diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 7c773e003663..261fa79d456d 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -11,6 +11,9 @@ msm-y := \ adreno/a5xx_gpu.o \ adreno/a5xx_power.o \ adreno/a5xx_preempt.o \ + adreno/a6xx_gpu.o \ + adreno/a6xx_gmu.o \ + adreno/a6xx_hfi.o \ hdmi/hdmi.o \ hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c new file mode 100644 index 000000000000..fbb501986720 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -0,0 +1,1207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ + +#include +#include +#include +#include + +#include "a6xx_gpu.h" +#include "a6xx_gmu.xml.h" + +static irqreturn_t a6xx_gmu_irq(int irq, void *data) +{ + struct a6xx_gmu *gmu = data; + u32 status; + + status = gmu_read(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_STATUS); + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, status); + + if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE) { + dev_err_ratelimited(gmu->dev, "GMU watchdog expired\n"); + + /* Temporary until we can recover safely */ + BUG(); + } + + if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR) + dev_err_ratelimited(gmu->dev, "GMU AHB bus error\n"); + + if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR) + dev_err_ratelimited(gmu->dev, "GMU fence error: 0x%x\n", + gmu_read(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS)); + + return IRQ_HANDLED; +} + +static irqreturn_t a6xx_hfi_irq(int irq, void *data) +{ + struct a6xx_gmu *gmu = data; + u32 status; + + status = gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, status); + + if (status & A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ) + tasklet_schedule(&gmu->hfi_tasklet); + + if (status & A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT) { + dev_err_ratelimited(gmu->dev, "GMU firmware fault\n"); + + /* Temporary until we can recover safely */ + BUG(); + } + + return IRQ_HANDLED; +} + +/* Check to see if the GX rail is still powered */ +static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) +{ + u32 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + + return !(val & + (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | + A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); +} + +static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) +{ + gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0); + + gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING, + ((index << 24) & 0xff) | (3 & 0xf)); + + /* + * Send an invalid index as a vote for the bus bandwidth and let the + * firmware decide on the right vote + */ + gmu_write(gmu, REG_A6XX_GMU_DCVS_BW_SETTING, 0xff); + + /* Set and clear the OOB for DCVS to trigger the GMU */ + a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET); + a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET); + + return gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN); +} + +static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu) +{ + u32 val; + int local = gmu->idle_level; + + /* SPTP and IFPC both report as IFPC */ + if (gmu->idle_level == GMU_IDLE_STATE_SPTP) + local = GMU_IDLE_STATE_IFPC; + + val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + + if (val == local) { + if (gmu->idle_level != GMU_IDLE_STATE_IFPC || + !a6xx_gmu_gx_is_on(gmu)) + return true; + } + + return false; +} + +/* Wait for the GMU to get to its most idle state */ +int a6xx_gmu_wait_for_idle(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + + return spin_until(a6xx_gmu_check_idle_level(gmu)); +} + +static int a6xx_gmu_start(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1); + gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 0); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, val, + val == 0xbabeface, 100, 10000); + + if (ret) + dev_err(gmu->dev, "GMU firmware initalization timed out\n"); + + return ret; +} + +static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu) +{ + u32 val; + int ret; + + gmu_rmw(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, + A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ, 0); + + gmu_write(gmu, REG_A6XX_GMU_HFI_CTRL_INIT, 1); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_HFI_CTRL_STATUS, val, + val & 1, 100, 10000); + if (ret) + dev_err(gmu->dev, "Unable to start the HFI queues\n"); + + return ret; +} + +/* Trigger a OOB (out of band) request to the GMU */ +int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) +{ + int ret; + u32 val; + int request, ack; + const char *name; + + switch (state) { + case GMU_OOB_GPU_SET: + request = GMU_OOB_GPU_SET_REQUEST; + ack = GMU_OOB_GPU_SET_ACK; + name = "GPU_SET"; + break; + case GMU_OOB_BOOT_SLUMBER: + request = GMU_OOB_BOOT_SLUMBER_REQUEST; + ack = GMU_OOB_BOOT_SLUMBER_ACK; + name = "BOOT_SLUMBER"; + break; + case GMU_OOB_DCVS_SET: + request = GMU_OOB_DCVS_REQUEST; + ack = GMU_OOB_DCVS_ACK; + name = "GPU_DCVS"; + break; + default: + return -EINVAL; + } + + /* Trigger the equested OOB operation */ + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << request); + + /* Wait for the acknowledge interrupt */ + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO, val, + val & (1 << ack), 100, 10000); + + if (ret) + dev_err(gmu->dev, + "Timeout waiting for GMU OOB set %s: 0x%x\n", + name, + gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO)); + + /* Clear the acknowledge interrupt */ + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, 1 << ack); + + return ret; +} + +/* Clear a pending OOB state in the GMU */ +void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) +{ + switch (state) { + case GMU_OOB_GPU_SET: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_GPU_SET_CLEAR); + break; + case GMU_OOB_BOOT_SLUMBER: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_BOOT_SLUMBER_CLEAR); + break; + case GMU_OOB_DCVS_SET: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_DCVS_CLEAR); + break; + } +} + +/* Enable CPU control of SPTP power power collapse */ +static int a6xx_sptprac_enable(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, val, + (val & 0x38) == 0x28, 1, 100); + + if (ret) { + dev_err(gmu->dev, "Unable to power on SPTPRAC: 0x%x\n", + gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS)); + } + + return 0; +} + +/* Disable CPU control of SPTP power power collapse */ +static void a6xx_sptprac_disable(struct a6xx_gmu *gmu) +{ + u32 val; + int ret; + + /* Make sure retention is on */ + gmu_rmw(gmu, REG_A6XX_GPU_CC_GX_GDSCR, 0, (1 << 11)); + + gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778001); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, val, + (val & 0x04), 100, 10000); + + if (ret) + dev_err(gmu->dev, "failed to power off SPTPRAC: 0x%x\n", + gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS)); +} + +/* Let the GMU know we are starting a boot sequence */ +static int a6xx_gmu_gfx_rail_on(struct a6xx_gmu *gmu) +{ + u32 vote; + + /* Let the GMU know we are getting ready for boot */ + gmu_write(gmu, REG_A6XX_GMU_BOOT_SLUMBER_OPTION, 0); + + /* Choose the "default" power level as the highest available */ + vote = gmu->gx_arc_votes[gmu->nr_gpu_freqs - 1]; + + gmu_write(gmu, REG_A6XX_GMU_GX_VOTE_IDX, vote & 0xff); + gmu_write(gmu, REG_A6XX_GMU_MX_VOTE_IDX, (vote >> 8) & 0xff); + + /* Let the GMU know the boot sequence has started */ + return a6xx_gmu_set_oob(gmu, GMU_OOB_BOOT_SLUMBER); +} + +/* Let the GMU know that we are about to go into slumber */ +static int a6xx_gmu_notify_slumber(struct a6xx_gmu *gmu) +{ + int ret; + + /* Disable the power counter so the GMU isn't busy */ + gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); + + /* Disable SPTP_PC if the CPU is responsible for it */ + if (gmu->idle_level < GMU_IDLE_STATE_SPTP) + a6xx_sptprac_disable(gmu); + + /* Tell the GMU to get ready to slumber */ + gmu_write(gmu, REG_A6XX_GMU_BOOT_SLUMBER_OPTION, 1); + + ret = a6xx_gmu_set_oob(gmu, GMU_OOB_BOOT_SLUMBER); + a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + + if (!ret) { + /* Check to see if the GMU really did slumber */ + if (gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE) + != 0x0f) { + dev_err(gmu->dev, "The GMU did not go into slumber\n"); + ret = -ETIMEDOUT; + } + } + + /* Put fence into allow mode */ + gmu_write(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); + return ret; +} + +static int a6xx_rpmh_start(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1 << 1); + /* Wait for the register to finish posting */ + wmb(); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val, + val & (1 << 1), 100, 10000); + if (ret) { + dev_err(gmu->dev, "Unable to power on the GPU RSC\n"); + return ret; + } + + ret = gmu_poll_timeout(gmu, REG_A6XX_RSCC_SEQ_BUSY_DRV0, val, + !val, 100, 10000); + + if (!ret) { + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); + + /* Re-enable the power counter */ + gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); + return 0; + } + + dev_err(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n"); + return ret; +} + +static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, + val, val & (1 << 16), 100, 10000); + if (ret) + dev_err(gmu->dev, "Unable to power off the GPU RSC\n"); + + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); +} + +static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) +{ + /* Disable SDE clock gating */ + gmu_write(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24)); + + /* Setup RSC PDC handshake for sleep and wakeup */ + gmu_write(gmu, REG_A6XX_RSCC_PDC_SLAVE_ID_DRV0, 1); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + 2, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + 2, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + 4, 0x80000000); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + 4, 0); + gmu_write(gmu, REG_A6XX_RSCC_OVERRIDE_START_ADDR, 0); + gmu_write(gmu, REG_A6XX_RSCC_PDC_SEQ_START_ADDR, 0x4520); + gmu_write(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_LO, 0x4510); + gmu_write(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514); + + /* Load RSC sequencer uCode for sleep and wakeup */ + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xa7a506a0); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xa1e6a6e7); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e081e1); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 3, 0xe9a982e2); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020e8a8); + + /* Load PDC sequencer uCode for power up and power down sequence */ + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0, 0xfebea1e1); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 1, 0xa5a4a3a2); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 2, 0x8382a6e0); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 3, 0xbce3e284); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 4, 0x002081fc); + + /* Set TCS commands used by PDC sequence for low power modes */ + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK, 7); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CONTROL, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR, 0x30010); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA, 1); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 4, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 4, 0x30000); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 4, 0x0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 8, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30080); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 8, 0x0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK, 7); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CONTROL, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR, 0x30010); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA, 2); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 4, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 4, 0x30000); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 8, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30080); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 8, 0x3); + + /* Setup GPU PDC */ + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_START_ADDR, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_ENABLE_PDC, 0x80000001); + + /* ensure no writes happen before the uCode is fully written */ + wmb(); +} + +/* + * The lowest 16 bits of this value are the number of XO clock cycles for main + * hysteresis which is set at 0x1680 cycles (300 us). The higher 16 bits are + * for the shorter hysteresis that happens after main - this is 0xa (.5 us) + */ + +#define GMU_PWR_COL_HYST 0x000a1680 + +/* Set up the idle state for the GMU */ +static void a6xx_gmu_power_config(struct a6xx_gmu *gmu) +{ + /* Disable GMU WB/RB buffer */ + gmu_write(gmu, REG_A6XX_GMU_SYS_BUS_CONFIG, 0x1); + + gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0x9c40400); + + switch (gmu->idle_level) { + case GMU_IDLE_STATE_IFPC: + gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_HYST, + GMU_PWR_COL_HYST); + gmu_rmw(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0, + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE | + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_HM_POWER_COLLAPSE_ENABLE); + /* Fall through */ + case GMU_IDLE_STATE_SPTP: + gmu_write(gmu, REG_A6XX_GMU_PWR_COL_SPTPRAC_HYST, + GMU_PWR_COL_HYST); + gmu_rmw(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0, + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE | + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_SPTPRAC_POWER_CONTROL_ENABLE); + } + + /* Enable RPMh GPU client */ + gmu_rmw(gmu, REG_A6XX_GMU_RPMH_CTRL, 0, + A6XX_GMU_RPMH_CTRL_RPMH_INTERFACE_ENABLE | + A6XX_GMU_RPMH_CTRL_LLC_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_DDR_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_MX_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_CX_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_GFX_VOTE_ENABLE); +} + +static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) +{ + static bool rpmh_init; + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + int i, ret; + u32 chipid; + u32 *image; + + if (state == GMU_WARM_BOOT) { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + } else { + if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU], + "GMU firmware is not loaded\n")) + return -ENOENT; + + /* Sanity check the size of the firmware that was loaded */ + if (adreno_gpu->fw[ADRENO_FW_GMU]->size > 0x8000) { + dev_err(gmu->dev, + "GMU firmware is bigger than the available region\n"); + return -EINVAL; + } + + /* Turn on register retention */ + gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1); + + /* We only need to load the RPMh microcode once */ + if (!rpmh_init) { + a6xx_gmu_rpmh_init(gmu); + rpmh_init = true; + } else if (state != GMU_RESET) { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + } + + image = (u32 *) adreno_gpu->fw[ADRENO_FW_GMU]->data; + + for (i = 0; i < adreno_gpu->fw[ADRENO_FW_GMU]->size >> 2; i++) + gmu_write(gmu, REG_A6XX_GMU_CM3_ITCM_START + i, + image[i]); + } + + gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0); + gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02); + + /* Write the iova of the HFI table */ + gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi->iova); + gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1); + + gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_RANGE_0, + (1 << 31) | (0xa << 18) | (0xa0)); + + chipid = adreno_gpu->rev.core << 24; + chipid |= adreno_gpu->rev.major << 16; + chipid |= adreno_gpu->rev.minor << 12; + chipid |= adreno_gpu->rev.patchid << 8; + + gmu_write(gmu, REG_A6XX_GMU_HFI_SFR_ADDR, chipid); + + /* Set up the lowest idle level on the GMU */ + a6xx_gmu_power_config(gmu); + + ret = a6xx_gmu_start(gmu); + if (ret) + return ret; + + ret = a6xx_gmu_gfx_rail_on(gmu); + if (ret) + return ret; + + /* Enable SPTP_PC if the CPU is responsible for it */ + if (gmu->idle_level < GMU_IDLE_STATE_SPTP) { + ret = a6xx_sptprac_enable(gmu); + if (ret) + return ret; + } + + ret = a6xx_gmu_hfi_start(gmu); + if (ret) + return ret; + + /* FIXME: Do we need this wmb() here? */ + wmb(); + + return 0; +} + +#define A6XX_HFI_IRQ_MASK \ + (A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ | \ + A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT) + +#define A6XX_GMU_IRQ_MASK \ + (A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE | \ + A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR | \ + A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR) + +static void a6xx_gmu_irq_enable(struct a6xx_gmu *gmu) +{ + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0); + + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, + ~A6XX_GMU_IRQ_MASK); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, + ~A6XX_HFI_IRQ_MASK); + + enable_irq(gmu->gmu_irq); + enable_irq(gmu->hfi_irq); +} + +static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu) +{ + disable_irq(gmu->gmu_irq); + disable_irq(gmu->hfi_irq); + + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, ~0); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~0); +} + +int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + int ret; + u32 val; + + /* Flush all the queues */ + a6xx_hfi_stop(gmu); + + /* Stop the interrupts */ + a6xx_gmu_irq_disable(gmu); + + /* Force off SPTP in case the GMU is managing it */ + a6xx_sptprac_disable(gmu); + + /* Make sure there are no outstanding RPMh votes */ + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val, + (val & 1), 100, 1000); + + /* Force off the GX GSDC */ + regulator_force_disable(gmu->gx); + + /* Disable the resources */ + clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); + pm_runtime_put_sync(gmu->dev); + + /* Re-enable the resources */ + pm_runtime_get_sync(gmu->dev); + + /* Use a known rate to bring up the GMU */ + clk_set_rate(gmu->core_clk, 200000000); + ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); + if (ret) + goto out; + + a6xx_gmu_irq_enable(gmu); + + ret = a6xx_gmu_fw_start(gmu, GMU_RESET); + if (!ret) + ret = a6xx_hfi_start(gmu, GMU_COLD_BOOT); + + /* Set the GPU back to the highest power frequency */ + a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1); + +out: + if (ret) + a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + + return ret; +} + +int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + int status, ret; + + if (WARN(!gmu->mmio, "The GMU is not set up yet\n")) + return 0; + + /* Turn on the resources */ + pm_runtime_get_sync(gmu->dev); + + /* Use a known rate to bring up the GMU */ + clk_set_rate(gmu->core_clk, 200000000); + ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); + if (ret) + goto out; + + a6xx_gmu_irq_enable(gmu); + + /* Check to see if we are doing a cold or warm boot */ + status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ? + GMU_WARM_BOOT : GMU_COLD_BOOT; + + ret = a6xx_gmu_fw_start(gmu, status); + if (ret) + goto out; + + ret = a6xx_hfi_start(gmu, status); + + /* Set the GPU to the highest power frequency */ + a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1); + +out: + /* Make sure to turn off the boot OOB request on error */ + if (ret) + a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + + return ret; +} + +bool a6xx_gmu_isidle(struct a6xx_gmu *gmu) +{ + u32 reg; + + if (!gmu->mmio) + return true; + + reg = gmu_read(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS); + + if (reg & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB) + return false; + + return true; +} + +int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 val; + + /* + * The GMU may still be in slumber unless the GPU started so check and + * skip putting it back into slumber if so + */ + val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + + if (val != 0xf) { + int ret = a6xx_gmu_wait_for_idle(a6xx_gpu); + + /* Temporary until we can recover safely */ + BUG_ON(ret); + + /* tell the GMU we want to slumber */ + a6xx_gmu_notify_slumber(gmu); + + ret = gmu_poll_timeout(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, + !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), + 100, 10000); + + /* + * Let the user know we failed to slumber but don't worry too + * much because we are powering down anyway + */ + + if (ret) + dev_err(gmu->dev, + "Unable to slumber GMU: status = 0%x/0%x\n", + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); + } + + /* Turn off HFI */ + a6xx_hfi_stop(gmu); + + /* Stop the interrupts and mask the hardware */ + a6xx_gmu_irq_disable(gmu); + + /* Tell RPMh to power off the GPU */ + a6xx_rpmh_stop(gmu); + + clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); + + pm_runtime_put_sync(gmu->dev); + + return 0; +} + +static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo) +{ + int count, i; + u64 iova; + + if (IS_ERR_OR_NULL(bo)) + return; + + count = bo->size >> PAGE_SHIFT; + iova = bo->iova; + + for (i = 0; i < count; i++, iova += PAGE_SIZE) { + iommu_unmap(gmu->domain, iova, PAGE_SIZE); + __free_pages(bo->pages[i], 0); + } + + kfree(bo->pages); + kfree(bo); +} + +static struct a6xx_gmu_bo *a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, + size_t size) +{ + struct a6xx_gmu_bo *bo; + int ret, count, i; + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + bo->size = PAGE_ALIGN(size); + + count = bo->size >> PAGE_SHIFT; + + bo->pages = kcalloc(count, sizeof(struct page *), GFP_KERNEL); + if (!bo->pages) { + kfree(bo); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < count; i++) { + bo->pages[i] = alloc_page(GFP_KERNEL); + if (!bo->pages[i]) + goto err; + } + + bo->iova = gmu->uncached_iova_base; + + for (i = 0; i < count; i++) { + ret = iommu_map(gmu->domain, + bo->iova + (PAGE_SIZE * i), + page_to_phys(bo->pages[i]), PAGE_SIZE, + IOMMU_READ | IOMMU_WRITE); + + if (ret) { + dev_err(gmu->dev, "Unable to map GMU buffer object\n"); + + for (i = i - 1 ; i >= 0; i--) + iommu_unmap(gmu->domain, + bo->iova + (PAGE_SIZE * i), + PAGE_SIZE); + + goto err; + } + } + + bo->virt = vmap(bo->pages, count, VM_IOREMAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!bo->virt) + goto err; + + /* Align future IOVA addresses on 1MB boundaries */ + gmu->uncached_iova_base += ALIGN(size, SZ_1M); + + return bo; + +err: + for (i = 0; i < count; i++) { + if (bo->pages[i]) + __free_pages(bo->pages[i], 0); + } + + kfree(bo->pages); + kfree(bo); + + return ERR_PTR(-ENOMEM); +} + +static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) +{ + int ret; + + /* + * The GMU address space is hardcoded to treat the range + * 0x60000000 - 0x80000000 as un-cached memory. All buffers shared + * between the GMU and the CPU will live in this space + */ + gmu->uncached_iova_base = 0x60000000; + + + gmu->domain = iommu_domain_alloc(&platform_bus_type); + if (!gmu->domain) + return -ENODEV; + + ret = iommu_attach_device(gmu->domain, gmu->dev); + + if (ret) { + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + } + + return ret; +} + +/* Get the list of RPMh voltage levels from cmd-db */ +static int a6xx_gmu_rpmh_arc_cmds(const char *id, void *vals, int size) +{ + u32 len = cmd_db_read_aux_data_len(id); + + if (!len) + return 0; + + if (WARN_ON(len > size)) + return -EINVAL; + + cmd_db_read_aux_data(id, vals, len); + + /* + * The data comes back as an array of unsigned shorts so adjust the + * count accordingly + */ + return len >> 1; +} + +/* Return the 'arc-level' for the given frequency */ +static u32 a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) +{ + struct dev_pm_opp *opp; + struct device_node *np; + u32 val = 0; + + if (!freq) + return 0; + + opp = dev_pm_opp_find_freq_exact(dev, freq, true); + if (IS_ERR(opp)) + return 0; + + np = dev_pm_opp_get_of_node(opp); + + if (np) { + of_property_read_u32(np, "qcom,level", &val); + of_node_put(np); + } + + dev_pm_opp_put(opp); + + return val; +} + +static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, + unsigned long *freqs, int freqs_count, + u16 *pri, int pri_count, + u16 *sec, int sec_count) +{ + int i, j; + + /* Construct a vote for each frequency */ + for (i = 0; i < freqs_count; i++) { + u8 pindex = 0, sindex = 0; + u32 level = a6xx_gmu_get_arc_level(dev, freqs[i]); + + /* Get the primary index that matches the arc level */ + for (j = 0; j < pri_count; j++) { + if (pri[j] >= level) { + pindex = j; + break; + } + } + + if (j == pri_count) { + dev_err(dev, + "Level %u not found in in the RPMh list\n", + level); + dev_err(dev, "Available levels:\n"); + for (j = 0; j < pri_count; j++) + dev_err(dev, " %u\n", pri[j]); + + return -EINVAL; + } + + /* + * Look for a level in in the secondary list that matches. If + * nothing fits, use the maximum non zero vote + */ + + for (j = 0; j < sec_count; j++) { + if (sec[j] >= level) { + sindex = j; + break; + } else if (sec[j]) { + sindex = j; + } + } + + /* Construct the vote */ + votes[i] = ((pri[pindex] & 0xffff) << 16) | + (sindex << 8) | pindex; + } + + return 0; +} + +/* + * The GMU votes with the RPMh for itself and on behalf of the GPU but we need + * to construct the list of votes on the CPU and send it over. Query the RPMh + * voltage levels and build the votes + */ + +static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + + u16 gx[16], cx[16], mx[16]; + u32 gxcount, cxcount, mxcount; + int ret; + + /* Get the list of available voltage levels for each component */ + gxcount = a6xx_gmu_rpmh_arc_cmds("gfx.lvl", gx, sizeof(gx)); + cxcount = a6xx_gmu_rpmh_arc_cmds("cx.lvl", cx, sizeof(cx)); + mxcount = a6xx_gmu_rpmh_arc_cmds("mx.lvl", mx, sizeof(mx)); + + /* Build the GX votes */ + ret = a6xx_gmu_rpmh_arc_votes_init(&gpu->pdev->dev, gmu->gx_arc_votes, + gmu->gpu_freqs, gmu->nr_gpu_freqs, + gx, gxcount, mx, mxcount); + + /* Build the CX votes */ + ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, + gmu->gmu_freqs, gmu->nr_gmu_freqs, + cx, cxcount, mx, mxcount); + + return ret; +} + +static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, + u32 size) +{ + int count = dev_pm_opp_get_opp_count(dev); + struct dev_pm_opp *opp; + int i, index = 0; + unsigned long freq = 1; + + /* + * The OPP table doesn't contain the "off" frequency level so we need to + * add 1 to the table size to account for it + */ + + if (WARN(count + 1 > size, + "The GMU frequency table is being truncated\n")) + count = size - 1; + + /* Set the "off" frequency */ + freqs[index++] = 0; + + for (i = 0; i < count; i++) { + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + freqs[index++] = freq++; + } + + return index; +} + +static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + + int ret = 0; + + /* + * The GMU handles its own frequency switching so build a list of + * available frequencies to send during initalization + */ + ret = dev_pm_opp_of_add_table(gmu->dev); + if (ret) { + dev_err(gmu->dev, "Unable to set the OPP table for the GMU\n"); + return ret; + } + + gmu->nr_gmu_freqs = a6xx_gmu_build_freq_table(gmu->dev, + gmu->gmu_freqs, ARRAY_SIZE(gmu->gmu_freqs)); + + /* + * The GMU also handles GPU frequency switching so build a list + * from the GPU OPP table + */ + gmu->nr_gpu_freqs = a6xx_gmu_build_freq_table(&gpu->pdev->dev, + gmu->gpu_freqs, ARRAY_SIZE(gmu->gpu_freqs)); + + /* Build the list of RPMh votes that we'll send to the GMU */ + return a6xx_gmu_rpmh_votes_init(gmu); +} + +static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu) +{ + int ret = msm_clk_bulk_get(gmu->dev, &gmu->clocks); + + if (ret < 1) + return ret; + + gmu->nr_clocks = ret; + + gmu->core_clk = msm_clk_bulk_get_clock(gmu->clocks, + gmu->nr_clocks, "gmu"); + + return 0; +} + +static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev, + const char *name) +{ + void __iomem *ret; + struct resource *res = platform_get_resource_byname(pdev, + IORESOURCE_MEM, name); + + if (!res) { + dev_err(&pdev->dev, "Unable to find the %s registers\n", name); + return ERR_PTR(-EINVAL); + } + + ret = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!ret) { + dev_err(&pdev->dev, "Unable to map the %s registers\n", name); + return ERR_PTR(-EINVAL); + } + + return ret; +} + +static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, + const char *name, irq_handler_t handler) +{ + int irq, ret; + + irq = platform_get_irq_byname(pdev, name); + + ret = devm_request_irq(&pdev->dev, irq, handler, IRQF_TRIGGER_HIGH, + name, gmu); + if (ret) { + dev_err(&pdev->dev, "Unable to get interrupt %s\n", name); + return ret; + } + + disable_irq(irq); + + return irq; +} + +void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + + if (IS_ERR_OR_NULL(gmu->mmio)) + return; + + pm_runtime_disable(gmu->dev); + a6xx_gmu_stop(a6xx_gpu); + + a6xx_gmu_irq_disable(gmu); + a6xx_gmu_memory_free(gmu, gmu->hfi); + + iommu_detach_device(gmu->domain, gmu->dev); + + iommu_domain_free(gmu->domain); +} + +int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + struct platform_device *pdev = of_find_device_by_node(node); + int ret; + + if (!pdev) + return -ENODEV; + + gmu->dev = &pdev->dev; + + of_dma_configure(gmu->dev, node, false); + + /* Fow now, don't do anything fancy until we get our feet under us */ + gmu->idle_level = GMU_IDLE_STATE_ACTIVE; + + pm_runtime_enable(gmu->dev); + gmu->gx = devm_regulator_get(gmu->dev, "vdd"); + + /* Get the list of clocks */ + ret = a6xx_gmu_clocks_probe(gmu); + if (ret) + return ret; + + /* Set up the IOMMU context bank */ + ret = a6xx_gmu_memory_probe(gmu); + if (ret) + return ret; + + /* Allocate memory for for the HFI queues */ + gmu->hfi = a6xx_gmu_memory_alloc(gmu, SZ_16K); + if (IS_ERR(gmu->hfi)) + goto err; + + /* Allocate memory for the GMU debug region */ + gmu->debug = a6xx_gmu_memory_alloc(gmu, SZ_16K); + if (IS_ERR(gmu->debug)) + goto err; + + /* Map the GMU registers */ + gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu"); + + /* Map the GPU power domain controller registers */ + gmu->pdc_mmio = a6xx_gmu_get_mmio(pdev, "gmu_pdc"); + + if (IS_ERR(gmu->mmio) || IS_ERR(gmu->pdc_mmio)) + goto err; + + /* Get the HFI and GMU interrupts */ + gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq); + gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq); + + if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) + goto err; + + /* Set up a tasklet to handle GMU HFI responses */ + tasklet_init(&gmu->hfi_tasklet, a6xx_hfi_task, (unsigned long) gmu); + + /* Get the power levels for the GMU and GPU */ + a6xx_gmu_pwrlevels_probe(gmu); + + /* Set up the HFI queues */ + a6xx_hfi_init(gmu); + + return 0; +err: + a6xx_gmu_memory_free(gmu, gmu->hfi); + + if (gmu->domain) { + iommu_detach_device(gmu->domain, gmu->dev); + + iommu_domain_free(gmu->domain); + } + + return -ENODEV; +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h new file mode 100644 index 000000000000..d9a386c18799 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ + +#ifndef _A6XX_GMU_H_ +#define _A6XX_GMU_H_ + +#include +#include "msm_drv.h" +#include "a6xx_hfi.h" + +struct a6xx_gmu_bo { + void *virt; + size_t size; + u64 iova; + struct page **pages; +}; + +/* + * These define the different GMU wake up options - these define how both the + * CPU and the GMU bring up the hardware + */ + +/* THe GMU has already been booted and the rentention registers are active */ +#define GMU_WARM_BOOT 0 + +/* the GMU is coming up for the first time or back from a power collapse */ +#define GMU_COLD_BOOT 1 + +/* The GMU is being soft reset after a fault */ +#define GMU_RESET 2 + +/* + * These define the level of control that the GMU has - the higher the number + * the more things that the GMU hardware controls on its own. + */ + +/* The GMU does not do any idle state management */ +#define GMU_IDLE_STATE_ACTIVE 0 + +/* The GMU manages SPTP power collapse */ +#define GMU_IDLE_STATE_SPTP 2 + +/* The GMU does automatic IFPC (intra-frame power collapse) */ +#define GMU_IDLE_STATE_IFPC 3 + +struct a6xx_gmu { + struct device *dev; + + void * __iomem mmio; + void * __iomem pdc_mmio; + + int hfi_irq; + int gmu_irq; + + struct regulator *gx; + + struct iommu_domain *domain; + u64 uncached_iova_base; + + int idle_level; + + struct a6xx_gmu_bo *hfi; + struct a6xx_gmu_bo *debug; + + int nr_clocks; + struct clk_bulk_data *clocks; + struct clk *core_clk; + + int nr_gpu_freqs; + unsigned long gpu_freqs[16]; + u32 gx_arc_votes[16]; + + int nr_gmu_freqs; + unsigned long gmu_freqs[4]; + u32 cx_arc_votes[4]; + + struct a6xx_hfi_queue queues[2]; + + struct tasklet_struct hfi_tasklet; +}; + +static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset) +{ + return msm_readl(gmu->mmio + (offset << 2)); +} + +static inline void gmu_write(struct a6xx_gmu *gmu, u32 offset, u32 value) +{ + return msm_writel(value, gmu->mmio + (offset << 2)); +} + +static inline void pdc_write(struct a6xx_gmu *gmu, u32 offset, u32 value) +{ + return msm_writel(value, gmu->pdc_mmio + (offset << 2)); +} + +static inline void gmu_rmw(struct a6xx_gmu *gmu, u32 reg, u32 mask, u32 or) +{ + u32 val = gmu_read(gmu, reg); + + val &= ~mask; + + gmu_write(gmu, reg, val | or); +} + +#define gmu_poll_timeout(gmu, addr, val, cond, interval, timeout) \ + readl_poll_timeout((gmu)->mmio + ((addr) << 2), val, cond, \ + interval, timeout) + +/* + * These are the available OOB (out of band requests) to the GMU where "out of + * band" means that the CPU talks to the GMU directly and not through HFI. + * Normally this works by writing a ITCM/DTCM register and then triggering a + * interrupt (the "request" bit) and waiting for an acknowledgment (the "ack" + * bit). The state is cleared by writing the "clear' bit to the GMU interrupt. + * + * These are used to force the GMU/GPU to stay on during a critical sequence or + * for hardware workarounds. + */ + +enum a6xx_gmu_oob_state { + GMU_OOB_BOOT_SLUMBER = 0, + GMU_OOB_GPU_SET, + GMU_OOB_DCVS_SET, +}; + +/* These are the interrupt / ack bits for each OOB request that are set + * in a6xx_gmu_set_oob and a6xx_clear_oob + */ + +/* + * Let the GMU know that a boot or slumber operation has started. The value in + * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are + * doing + */ +#define GMU_OOB_BOOT_SLUMBER_REQUEST 22 +#define GMU_OOB_BOOT_SLUMBER_ACK 30 +#define GMU_OOB_BOOT_SLUMBER_CLEAR 30 + +/* + * Set a new power level for the GPU when the CPU is doing frequency scaling + */ +#define GMU_OOB_DCVS_REQUEST 23 +#define GMU_OOB_DCVS_ACK 31 +#define GMU_OOB_DCVS_CLEAR 31 + +/* + * Let the GMU know to not turn off any GPU registers while the CPU is in a + * critical section + */ +#define GMU_OOB_GPU_SET_REQUEST 16 +#define GMU_OOB_GPU_SET_ACK 24 +#define GMU_OOB_GPU_SET_CLEAR 24 + + +void a6xx_hfi_init(struct a6xx_gmu *gmu); +int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); +void a6xx_hfi_stop(struct a6xx_gmu *gmu); + +void a6xx_hfi_task(unsigned long data); + +#endif diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c new file mode 100644 index 000000000000..c629f742a1d1 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -0,0 +1,818 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ + + +#include "msm_gem.h" +#include "msm_mmu.h" +#include "a6xx_gpu.h" +#include "a6xx_gmu.xml.h" + +static inline bool _a6xx_check_idle(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + /* Check that the GMU is idle */ + if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) + return false; + + /* Check tha the CX master is idle */ + if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & + ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) + return false; + + return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & + A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); +} + +bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + /* wait for CP to drain ringbuffer: */ + if (!adreno_idle(gpu, ring)) + return false; + + if (spin_until(_a6xx_check_idle(gpu))) { + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", + gpu->name, __builtin_return_address(0), + gpu_read(gpu, REG_A6XX_RBBM_STATUS), + gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A6XX_CP_RB_RPTR), + gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); + return false; + } + + return true; +} + +static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + uint32_t wptr; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + + spin_unlock_irqrestore(&ring->lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); + + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); +} + +static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; + unsigned int i; + + /* Invalidate CCU depth and color */ + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PC_CCU_INVALIDATE_DEPTH); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PC_CCU_INVALIDATE_COLOR); + + /* Submit the commands */ + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + if (priv->lastctx == ctx) + break; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, submit->cmd[i].size); + break; + } + } + + /* Write the fence to the scratch register */ + OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); + OUT_RING(ring, submit->seqno); + + /* + * Execute a CACHE_FLUSH_TS event. This will ensure that the + * timestamp is written to the memory and then triggers the interrupt + */ + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); + + a6xx_flush(gpu, ring); +} + +static const struct { + u32 offset; + u32 value; +} a6xx_hwcg[] = { + {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555} +}; + +static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + unsigned int i; + u32 val; + + val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); + + /* Don't re-program the registers if they are already correct */ + if ((!state && !val) || (state && (val == 0x8aa8aa02))) + return; + + /* Disable SP clock before programming HWCG registers */ + gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); + + for (i = 0; i < ARRAY_SIZE(a6xx_hwcg); i++) + gpu_write(gpu, a6xx_hwcg[i].offset, + state ? a6xx_hwcg[i].value : 0); + + /* Enable SP clock */ + gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); + + gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? 0x8aa8aa02 : 0); +} + +static int a6xx_cp_init(struct msm_gpu *gpu) +{ + struct msm_ringbuffer *ring = gpu->rb[0]; + + OUT_PKT7(ring, CP_ME_INIT, 8); + + OUT_RING(ring, 0x0000002f); + + /* Enable multiple hardware contexts */ + OUT_RING(ring, 0x00000003); + + /* Enable error detection */ + OUT_RING(ring, 0x20000000); + + /* Don't enable header dump */ + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + /* No workarounds enabled */ + OUT_RING(ring, 0x00000000); + + /* Pad rest of the cmds with 0's */ + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + a6xx_flush(gpu, ring); + return a6xx_idle(gpu, ring) ? 0 : -EINVAL; +} + +static int a6xx_ucode_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (!a6xx_gpu->sqe_bo) { + a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, + adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); + + if (IS_ERR(a6xx_gpu->sqe_bo)) { + int ret = PTR_ERR(a6xx_gpu->sqe_bo); + + a6xx_gpu->sqe_bo = NULL; + DRM_DEV_ERROR(&gpu->pdev->dev, + "Could not allocate SQE ucode: %d\n", ret); + + return ret; + } + } + + gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO, + REG_A6XX_CP_SQE_INSTR_BASE_HI, a6xx_gpu->sqe_iova); + + return 0; +} + +#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ + A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ + A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ + A6XX_RBBM_INT_0_MASK_CP_IB2 | \ + A6XX_RBBM_INT_0_MASK_CP_IB1 | \ + A6XX_RBBM_INT_0_MASK_CP_RB | \ + A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ + A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ + A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ + A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ + A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) + +static int a6xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int ret; + + /* Make sure the GMU keeps the GPU on while we set it up */ + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); + + /* + * Disable the trusted memory range - we don't actually supported secure + * memory rendering at this point in time and we don't want to block off + * part of the virtual memory space. + */ + gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); + + /* enable hardware clockgating */ + a6xx_set_hwcg(gpu, true); + + /* VBIF start */ + gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); + gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); + + /* Disable L2 bypass in the UCHE */ + gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0); + gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff); + gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000); + gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000); + gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, + REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000); + + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO, + REG_A6XX_UCHE_GMEM_RANGE_MAX_HI, + 0x00100000 + adreno_gpu->gmem - 1); + + gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); + gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); + + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); + + /* Setting the mem pool size */ + gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); + + /* Setting the primFifo thresholds default values */ + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, (0x300 << 11)); + + /* Set the AHB default slave response to "ERROR" */ + gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); + + /* Turn on performance counters */ + gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); + + /* Select CP0 to always count cycles */ + gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); + + /* FIXME: not sure if this should live here or in a6xx_gmu.c */ + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, + 0xff000000); + gmu_rmw(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, + 0xff, 0x20); + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, + 0x01); + + gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21); + + /* Enable fault detection */ + gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | 0x1fffff); + + gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1); + + /* Protect registers from the CP */ + gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 0x00000003); + + gpu_write(gpu, REG_A6XX_CP_PROTECT(0), + A6XX_PROTECT_RDONLY(0x600, 0x51)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(1), A6XX_PROTECT_RW(0xae50, 0x2)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(2), A6XX_PROTECT_RW(0x9624, 0x13)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(3), A6XX_PROTECT_RW(0x8630, 0x8)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(4), A6XX_PROTECT_RW(0x9e70, 0x1)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(5), A6XX_PROTECT_RW(0x9e78, 0x187)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(6), A6XX_PROTECT_RW(0xf000, 0x810)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(7), + A6XX_PROTECT_RDONLY(0xfc00, 0x3)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(8), A6XX_PROTECT_RW(0x50e, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(9), A6XX_PROTECT_RDONLY(0x50f, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(10), A6XX_PROTECT_RW(0x510, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(11), + A6XX_PROTECT_RDONLY(0x0, 0x4f9)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(12), + A6XX_PROTECT_RDONLY(0x501, 0xa)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(13), + A6XX_PROTECT_RDONLY(0x511, 0x44)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(14), A6XX_PROTECT_RW(0xe00, 0xe)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(15), A6XX_PROTECT_RW(0x8e00, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(16), A6XX_PROTECT_RW(0x8e50, 0xf)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(17), A6XX_PROTECT_RW(0xbe02, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(18), + A6XX_PROTECT_RW(0xbe20, 0x11f3)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(19), A6XX_PROTECT_RW(0x800, 0x82)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(20), A6XX_PROTECT_RW(0x8a0, 0x8)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(21), A6XX_PROTECT_RW(0x8ab, 0x19)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(22), A6XX_PROTECT_RW(0x900, 0x4d)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(23), A6XX_PROTECT_RW(0x98d, 0x76)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(24), + A6XX_PROTECT_RDONLY(0x8d0, 0x23)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(25), + A6XX_PROTECT_RDONLY(0x980, 0x4)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(26), A6XX_PROTECT_RW(0xa630, 0x0)); + + /* Enable interrupts */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK); + + ret = adreno_hw_init(gpu); + if (ret) + goto out; + + ret = a6xx_ucode_init(gpu); + if (ret) + goto out; + + /* Always come up on rb 0 */ + a6xx_gpu->cur_ring = gpu->rb[0]; + + /* Enable the SQE_to start the CP engine */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); + + ret = a6xx_cp_init(gpu); + if (ret) + goto out; + + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); + +out: + /* + * Tell the GMU that we are done touching the GPU and it can start power + * management + */ + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + + /* Take the GMU out of its special boot mode */ + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); + + return ret; +} + +static void a6xx_dump(struct msm_gpu *gpu) +{ + dev_info(&gpu->pdev->dev, "status: %08x\n", + gpu_read(gpu, REG_A6XX_RBBM_STATUS)); + adreno_dump(gpu); +} + +#define VBIF_RESET_ACK_TIMEOUT 100 +#define VBIF_RESET_ACK_MASK 0x00f0 + +static void a6xx_recover(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int i; + + adreno_dump_info(gpu); + + for (i = 0; i < 8; i++) + dev_info(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + + if (hang_debug) + a6xx_dump(gpu); + + /* + * Turn off keep alive that might have been enabled by the hang + * interrupt + */ + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0); + + gpu->funcs->pm_suspend(gpu); + gpu->funcs->pm_resume(gpu); + + msm_gpu_hw_init(gpu); +} + +static int a6xx_fault_handler(void *arg, unsigned long iova, int flags) +{ + struct msm_gpu *gpu = arg; + + pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", + iova, flags, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7))); + + return -EFAULT; +} + +static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) +{ + u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); + + if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { + u32 val; + + gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); + val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); + dev_err_ratelimited(&gpu->pdev->dev, + "CP | opcode error | possible opcode=0x%8.8X\n", + val); + } + + if (status & A6XX_CP_INT_CP_UCODE_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, + "CP ucode error interrupt\n"); + + if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", + gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); + + if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { + u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", + val & (1 << 20) ? "READ" : "WRITE", + (val & 0x3ffff), val); + } + + if (status & A6XX_CP_INT_CP_AHB_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); + + if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); + + if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); + +} + +static void a6xx_fault_detect_irq(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + + /* + * Force the GPU to stay on until after we finish + * collecting information + */ + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); + + DRM_DEV_ERROR(&gpu->pdev->dev, + "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, + gpu_read(gpu, REG_A6XX_RBBM_STATUS), + gpu_read(gpu, REG_A6XX_CP_RB_RPTR), + gpu_read(gpu, REG_A6XX_CP_RB_WPTR), + gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI), + gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), + gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI), + gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); + + /* Turn off the hangcheck timer to keep it from bothering us */ + del_timer(&gpu->hangcheck_timer); + + queue_work(priv->wq, &gpu->recover_work); +} + +static irqreturn_t a6xx_irq(struct msm_gpu *gpu) +{ + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); + + gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); + + if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) + a6xx_fault_detect_irq(gpu); + + if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); + + if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) + a6xx_cp_hw_err_irq(gpu); + + if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) + dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); + + if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) + dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); + + if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) + dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); + + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + msm_gpu_retire(gpu); + + return IRQ_HANDLED; +} + +static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, + REG_A6XX_CP_RB_RPTR_ADDR_LO), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, + REG_A6XX_CP_RB_RPTR_ADDR_HI), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), +}; + +static const u32 a6xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b, + 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044, + 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb, + 0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d, + 0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, + 0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813, + 0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, + 0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, + 0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, + 0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, + 0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, + 0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06, + 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19, + 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601, + 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637, + 0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c, + 0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f, + 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77, + 0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e, + 0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23, + 0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79, + 0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, + 0xa610, 0xa617, 0xa630, 0xa630, + ~0 +}; + +static int a6xx_pm_resume(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int ret; + + ret = a6xx_gmu_resume(a6xx_gpu); + + gpu->needs_hw_init = true; + + return ret; +} + +static int a6xx_pm_suspend(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + /* + * Make sure the GMU is idle before continuing (because some transitions + * may use VBIF + */ + a6xx_gmu_wait_for_idle(a6xx_gpu); + + /* Clear the VBIF pipe before shutting down */ + /* FIXME: This accesses the GPU - do we need to make sure it is on? */ + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); + spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf); + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); + + return a6xx_gmu_stop(a6xx_gpu); +} + +static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + /* Force the GPU power on so we can read this register */ + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + + *value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO, + REG_A6XX_RBBM_PERFCTR_CP_0_HI); + + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + return 0; +} + +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) +static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p) +{ + adreno_show(gpu, state, p); +} +#endif + +static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + return a6xx_gpu->cur_ring; +} + +static void a6xx_destroy(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (a6xx_gpu->sqe_bo) { + if (a6xx_gpu->sqe_iova) + msm_gem_put_iova(a6xx_gpu->sqe_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(a6xx_gpu->sqe_bo); + } + + a6xx_gmu_remove(a6xx_gpu); + + adreno_gpu_cleanup(adreno_gpu); + kfree(a6xx_gpu); +} + +static const struct adreno_gpu_funcs funcs = { + .base = { + .get_param = adreno_get_param, + .hw_init = a6xx_hw_init, + .pm_suspend = a6xx_pm_suspend, + .pm_resume = a6xx_pm_resume, + .recover = a6xx_recover, + .submit = a6xx_submit, + .flush = a6xx_flush, + .active_ring = a6xx_active_ring, + .irq = a6xx_irq, + .destroy = a6xx_destroy, +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) + .show = a6xx_show, +#endif + }, + .get_timestamp = a6xx_get_timestamp, +}; + +struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; + struct device_node *node; + struct a6xx_gpu *a6xx_gpu; + struct adreno_gpu *adreno_gpu; + struct msm_gpu *gpu; + int ret; + + a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); + if (!a6xx_gpu) + return ERR_PTR(-ENOMEM); + + adreno_gpu = &a6xx_gpu->base; + gpu = &adreno_gpu->base; + + adreno_gpu->registers = a6xx_registers; + adreno_gpu->reg_offsets = a6xx_register_offsets; + + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + /* Check if there is a GMU phandle and set it up */ + node = of_parse_phandle(pdev->dev.of_node, "gmu", 0); + + /* FIXME: How do we gracefully handle this? */ + BUG_ON(!node); + + ret = a6xx_gmu_probe(a6xx_gpu, node); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + if (gpu->aspace) + msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, + a6xx_fault_handler); + + return gpu; +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h new file mode 100644 index 000000000000..dd69e5b0e692 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ + +#ifndef __A6XX_GPU_H__ +#define __A6XX_GPU_H__ + + +#include "adreno_gpu.h" +#include "a6xx.xml.h" + +#include "a6xx_gmu.h" + +extern bool hang_debug; + +struct a6xx_gpu { + struct adreno_gpu base; + + struct drm_gem_object *sqe_bo; + uint64_t sqe_iova; + + struct msm_ringbuffer *cur_ring; + + struct a6xx_gmu gmu; +}; + +#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) + +/* + * Given a register and a count, return a value to program into + * REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len + * registers starting at _reg. + */ +#define A6XX_PROTECT_RW(_reg, _len) \ + ((1 << 31) | \ + (((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF)) + +/* + * Same as above, but allow reads over the range. For areas of mixed use (such + * as performance counters) this allows us to protect a much larger range with a + * single register + */ +#define A6XX_PROTECT_RDONLY(_reg, _len) \ + ((((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF)) + + +int a6xx_gmu_resume(struct a6xx_gpu *gpu); +int a6xx_gmu_stop(struct a6xx_gpu *gpu); + +int a6xx_gmu_wait_for_idle(struct a6xx_gpu *gpu); + +int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu); +bool a6xx_gmu_isidle(struct a6xx_gmu *gmu); + +int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); +void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); + +int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node); +void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); + +#endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c new file mode 100644 index 000000000000..f19ef4cb6ea4 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ + +#include +#include +#include + +#include "a6xx_gmu.h" +#include "a6xx_gmu.xml.h" + +#define HFI_MSG_ID(val) [val] = #val + +static const char * const a6xx_hfi_msg_id[] = { + HFI_MSG_ID(HFI_H2F_MSG_INIT), + HFI_MSG_ID(HFI_H2F_MSG_FW_VERSION), + HFI_MSG_ID(HFI_H2F_MSG_BW_TABLE), + HFI_MSG_ID(HFI_H2F_MSG_PERF_TABLE), + HFI_MSG_ID(HFI_H2F_MSG_TEST), +}; + +static int a6xx_hfi_queue_read(struct a6xx_hfi_queue *queue, u32 *data, + u32 dwords) +{ + struct a6xx_hfi_queue_header *header = queue->header; + u32 i, hdr, index = header->read_index; + + if (header->read_index == header->write_index) { + header->rx_request = 1; + return 0; + } + + hdr = queue->data[index]; + + /* + * If we are to assume that the GMU firmware is in fact a rational actor + * and is programmed to not send us a larger response than we expect + * then we can also assume that if the header size is unexpectedly large + * that it is due to memory corruption and/or hardware failure. In this + * case the only reasonable course of action is to BUG() to help harden + * the failure. + */ + + BUG_ON(HFI_HEADER_SIZE(hdr) > dwords); + + for (i = 0; i < HFI_HEADER_SIZE(hdr); i++) { + data[i] = queue->data[index]; + index = (index + 1) % header->size; + } + + header->read_index = index; + return HFI_HEADER_SIZE(hdr); +} + +static int a6xx_hfi_queue_write(struct a6xx_gmu *gmu, + struct a6xx_hfi_queue *queue, u32 *data, u32 dwords) +{ + struct a6xx_hfi_queue_header *header = queue->header; + u32 i, space, index = header->write_index; + + spin_lock(&queue->lock); + + space = CIRC_SPACE(header->write_index, header->read_index, + header->size); + if (space < dwords) { + header->dropped++; + spin_unlock(&queue->lock); + return -ENOSPC; + } + + for (i = 0; i < dwords; i++) { + queue->data[index] = data[i]; + index = (index + 1) % header->size; + } + + header->write_index = index; + spin_unlock(&queue->lock); + + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 0x01); + return 0; +} + +struct a6xx_hfi_response { + u32 id; + u32 seqnum; + struct list_head node; + struct completion complete; + + u32 error; + u32 payload[16]; +}; + +/* + * Incoming HFI ack messages can come in out of order so we need to store all + * the pending messages on a list until they are handled. + */ +static spinlock_t hfi_ack_lock = __SPIN_LOCK_UNLOCKED(message_lock); +static LIST_HEAD(hfi_ack_list); + +static void a6xx_hfi_handle_ack(struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_response *msg) +{ + struct a6xx_hfi_response *resp; + u32 id, seqnum; + + /* msg->ret_header contains the header of the message being acked */ + id = HFI_HEADER_ID(msg->ret_header); + seqnum = HFI_HEADER_SEQNUM(msg->ret_header); + + spin_lock(&hfi_ack_lock); + list_for_each_entry(resp, &hfi_ack_list, node) { + if (resp->id == id && resp->seqnum == seqnum) { + resp->error = msg->error; + memcpy(resp->payload, msg->payload, + sizeof(resp->payload)); + + complete(&resp->complete); + spin_unlock(&hfi_ack_lock); + return; + } + } + spin_unlock(&hfi_ack_lock); + + dev_err(gmu->dev, "Nobody was waiting for HFI message %d\n", seqnum); +} + +static void a6xx_hfi_handle_error(struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_response *msg) +{ + struct a6xx_hfi_msg_error *error = (struct a6xx_hfi_msg_error *) msg; + + dev_err(gmu->dev, "GMU firmware error %d\n", error->code); +} + +void a6xx_hfi_task(unsigned long data) +{ + struct a6xx_gmu *gmu = (struct a6xx_gmu *) data; + struct a6xx_hfi_queue *queue = &gmu->queues[HFI_RESPONSE_QUEUE]; + struct a6xx_hfi_msg_response resp; + + for (;;) { + u32 id; + int ret = a6xx_hfi_queue_read(queue, (u32 *) &resp, + sizeof(resp) >> 2); + + /* Returns the number of bytes copied or negative on error */ + if (ret <= 0) { + if (ret < 0) + dev_err(gmu->dev, + "Unable to read the HFI message queue\n"); + break; + } + + id = HFI_HEADER_ID(resp.header); + + if (id == HFI_F2H_MSG_ACK) + a6xx_hfi_handle_ack(gmu, &resp); + else if (id == HFI_F2H_MSG_ERROR) + a6xx_hfi_handle_error(gmu, &resp); + } +} + +static int a6xx_hfi_send_msg(struct a6xx_gmu *gmu, int id, + void *data, u32 size, u32 *payload, u32 payload_size) +{ + struct a6xx_hfi_queue *queue = &gmu->queues[HFI_COMMAND_QUEUE]; + struct a6xx_hfi_response resp = { 0 }; + int ret, dwords = size >> 2; + u32 seqnum; + + seqnum = atomic_inc_return(&queue->seqnum) % 0xfff; + + /* First dword of the message is the message header - fill it in */ + *((u32 *) data) = (seqnum << 20) | (HFI_MSG_CMD << 16) | + (dwords << 8) | id; + + init_completion(&resp.complete); + resp.id = id; + resp.seqnum = seqnum; + + spin_lock_bh(&hfi_ack_lock); + list_add_tail(&resp.node, &hfi_ack_list); + spin_unlock_bh(&hfi_ack_lock); + + ret = a6xx_hfi_queue_write(gmu, queue, data, dwords); + if (ret) { + dev_err(gmu->dev, "Unable to send message %s id %d\n", + a6xx_hfi_msg_id[id], seqnum); + goto out; + } + + /* Wait up to 5 seconds for the response */ + ret = wait_for_completion_timeout(&resp.complete, + msecs_to_jiffies(5000)); + if (!ret) { + dev_err(gmu->dev, + "Message %s id %d timed out waiting for response\n", + a6xx_hfi_msg_id[id], seqnum); + ret = -ETIMEDOUT; + } else + ret = 0; + +out: + spin_lock_bh(&hfi_ack_lock); + list_del(&resp.node); + spin_unlock_bh(&hfi_ack_lock); + + if (ret) + return ret; + + if (resp.error) { + dev_err(gmu->dev, "Message %s id %d returned error %d\n", + a6xx_hfi_msg_id[id], seqnum, resp.error); + return -EINVAL; + } + + if (payload && payload_size) { + int copy = min_t(u32, payload_size, sizeof(resp.payload)); + + memcpy(payload, resp.payload, copy); + } + + return 0; +} + +static int a6xx_hfi_send_gmu_init(struct a6xx_gmu *gmu, int boot_state) +{ + struct a6xx_hfi_msg_gmu_init_cmd msg = { 0 }; + + msg.dbg_buffer_addr = (u32) gmu->debug->iova; + msg.dbg_buffer_size = (u32) gmu->debug->size; + msg.boot_state = boot_state; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_INIT, &msg, sizeof(msg), + NULL, 0); +} + +static int a6xx_hfi_get_fw_version(struct a6xx_gmu *gmu, u32 *version) +{ + struct a6xx_hfi_msg_fw_version msg = { 0 }; + + /* Currently supporting version 1.1 */ + msg.supported_version = (1 << 28) | (1 << 16); + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_FW_VERSION, &msg, sizeof(msg), + version, sizeof(*version)); +} + +static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_perf_table msg = { 0 }; + int i; + + msg.num_gpu_levels = gmu->nr_gpu_freqs; + msg.num_gmu_levels = gmu->nr_gmu_freqs; + + for (i = 0; i < gmu->nr_gpu_freqs; i++) { + msg.gx_votes[i].vote = gmu->gx_arc_votes[i]; + msg.gx_votes[i].freq = gmu->gpu_freqs[i] / 1000; + } + + for (i = 0; i < gmu->nr_gmu_freqs; i++) { + msg.cx_votes[i].vote = gmu->cx_arc_votes[i]; + msg.cx_votes[i].freq = gmu->gmu_freqs[i] / 1000; + } + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_PERF_TABLE, &msg, sizeof(msg), + NULL, 0); +} + +static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_bw_table msg = { 0 }; + + /* + * The sdm845 GMU doesn't do bus frequency scaling on its own but it + * does need at least one entry in the list because it might be accessed + * when the GMU is shutting down. Send a single "off" entry. + */ + + msg.bw_level_num = 1; + + msg.ddr_cmds_num = 3; + msg.ddr_wait_bitmask = 0x07; + + msg.ddr_cmds_addrs[0] = 0x50000; + msg.ddr_cmds_addrs[1] = 0x5005c; + msg.ddr_cmds_addrs[2] = 0x5000c; + + msg.ddr_cmds_data[0][0] = 0x40000000; + msg.ddr_cmds_data[0][1] = 0x40000000; + msg.ddr_cmds_data[0][2] = 0x40000000; + + /* + * These are the CX (CNOC) votes. This is used but the values for the + * sdm845 GMU are known and fixed so we can hard code them. + */ + + msg.cnoc_cmds_num = 3; + msg.cnoc_wait_bitmask = 0x05; + + msg.cnoc_cmds_addrs[0] = 0x50034; + msg.cnoc_cmds_addrs[1] = 0x5007c; + msg.cnoc_cmds_addrs[2] = 0x5004c; + + msg.cnoc_cmds_data[0][0] = 0x40000000; + msg.cnoc_cmds_data[0][1] = 0x00000000; + msg.cnoc_cmds_data[0][2] = 0x40000000; + + msg.cnoc_cmds_data[1][0] = 0x60000001; + msg.cnoc_cmds_data[1][1] = 0x20000001; + msg.cnoc_cmds_data[1][2] = 0x60000001; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg), + NULL, 0); +} + +static int a6xx_hfi_send_test(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_test msg = { 0 }; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_TEST, &msg, sizeof(msg), + NULL, 0); +} + +int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state) +{ + int ret; + + ret = a6xx_hfi_send_gmu_init(gmu, boot_state); + if (ret) + return ret; + + ret = a6xx_hfi_get_fw_version(gmu, NULL); + if (ret) + return ret; + + /* + * We have to get exchange version numbers per the sequence but at this + * point th kernel driver doesn't need to know the exact version of + * the GMU firmware + */ + + ret = a6xx_hfi_send_perf_table(gmu); + if (ret) + return ret; + + ret = a6xx_hfi_send_bw_table(gmu); + if (ret) + return ret; + + /* + * Let the GMU know that there won't be any more HFI messages until next + * boot + */ + a6xx_hfi_send_test(gmu); + + return 0; +} + +void a6xx_hfi_stop(struct a6xx_gmu *gmu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { + struct a6xx_hfi_queue *queue = &gmu->queues[i]; + + if (!queue->header) + continue; + + if (queue->header->read_index != queue->header->write_index) + dev_err(gmu->dev, "HFI queue %d is not empty\n", i); + + queue->header->read_index = 0; + queue->header->write_index = 0; + } +} + +static void a6xx_hfi_queue_init(struct a6xx_hfi_queue *queue, + struct a6xx_hfi_queue_header *header, void *virt, u64 iova, + u32 id) +{ + spin_lock_init(&queue->lock); + queue->header = header; + queue->data = virt; + atomic_set(&queue->seqnum, 0); + + /* Set up the shared memory header */ + header->iova = iova; + header->type = 10 << 8 | id; + header->status = 1; + header->size = SZ_4K >> 2; + header->msg_size = 0; + header->dropped = 0; + header->rx_watermark = 1; + header->tx_watermark = 1; + header->rx_request = 1; + header->tx_request = 0; + header->read_index = 0; + header->write_index = 0; +} + +void a6xx_hfi_init(struct a6xx_gmu *gmu) +{ + struct a6xx_gmu_bo *hfi = gmu->hfi; + struct a6xx_hfi_queue_table_header *table = hfi->virt; + struct a6xx_hfi_queue_header *headers = hfi->virt + sizeof(*table); + u64 offset; + int table_size; + + /* + * The table size is the size of the table header plus all of the queue + * headers + */ + table_size = sizeof(*table); + table_size += (ARRAY_SIZE(gmu->queues) * + sizeof(struct a6xx_hfi_queue_header)); + + table->version = 0; + table->size = table_size; + /* First queue header is located immediately after the table header */ + table->qhdr0_offset = sizeof(*table) >> 2; + table->qhdr_size = sizeof(struct a6xx_hfi_queue_header) >> 2; + table->num_queues = ARRAY_SIZE(gmu->queues); + table->active_queues = ARRAY_SIZE(gmu->queues); + + /* Command queue */ + offset = SZ_4K; + a6xx_hfi_queue_init(&gmu->queues[0], &headers[0], hfi->virt + offset, + hfi->iova + offset, 0); + + /* GMU response queue */ + offset += SZ_4K; + a6xx_hfi_queue_init(&gmu->queues[1], &headers[1], hfi->virt + offset, + hfi->iova + offset, 4); +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h new file mode 100644 index 000000000000..60d1319fa44f --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ + +#ifndef _A6XX_HFI_H_ +#define _A6XX_HFI_H_ + +struct a6xx_hfi_queue_table_header { + u32 version; + u32 size; /* Size of the queue table in dwords */ + u32 qhdr0_offset; /* Offset of the first queue header */ + u32 qhdr_size; /* Size of the queue headers */ + u32 num_queues; /* Number of total queues */ + u32 active_queues; /* Number of active queues */ +}; + +struct a6xx_hfi_queue_header { + u32 status; + u32 iova; + u32 type; + u32 size; + u32 msg_size; + u32 dropped; + u32 rx_watermark; + u32 tx_watermark; + u32 rx_request; + u32 tx_request; + u32 read_index; + u32 write_index; +}; + +struct a6xx_hfi_queue { + struct a6xx_hfi_queue_header *header; + spinlock_t lock; + u32 *data; + atomic_t seqnum; +}; + +/* This is the outgoing queue to the GMU */ +#define HFI_COMMAND_QUEUE 0 + +/* THis is the incoming response queue from the GMU */ +#define HFI_RESPONSE_QUEUE 1 + +#define HFI_HEADER_ID(msg) ((msg) & 0xff) +#define HFI_HEADER_SIZE(msg) (((msg) >> 8) & 0xff) +#define HFI_HEADER_SEQNUM(msg) (((msg) >> 20) & 0xfff) + +/* FIXME: Do we need this or can we use ARRAY_SIZE? */ +#define HFI_RESPONSE_PAYLOAD_SIZE 16 + +/* HFI message types */ + +#define HFI_MSG_CMD 0 +#define HFI_MSG_ACK 2 + +#define HFI_F2H_MSG_ACK 126 + +struct a6xx_hfi_msg_response { + u32 header; + u32 ret_header; + u32 error; + u32 payload[HFI_RESPONSE_PAYLOAD_SIZE]; +}; + +#define HFI_F2H_MSG_ERROR 100 + +struct a6xx_hfi_msg_error { + u32 header; + u32 code; + u32 payload[2]; +}; + +#define HFI_H2F_MSG_INIT 0 + +struct a6xx_hfi_msg_gmu_init_cmd { + u32 header; + u32 seg_id; + u32 dbg_buffer_addr; + u32 dbg_buffer_size; + u32 boot_state; +}; + +#define HFI_H2F_MSG_FW_VERSION 1 + +struct a6xx_hfi_msg_fw_version { + u32 header; + u32 supported_version; +}; + +#define HFI_H2F_MSG_PERF_TABLE 4 + +struct perf_level { + u32 vote; + u32 freq; +}; + +struct a6xx_hfi_msg_perf_table { + u32 header; + u32 num_gpu_levels; + u32 num_gmu_levels; + + struct perf_level gx_votes[16]; + struct perf_level cx_votes[4]; +}; + +#define HFI_H2F_MSG_BW_TABLE 3 + +struct a6xx_hfi_msg_bw_table { + u32 header; + u32 bw_level_num; + u32 cnoc_cmds_num; + u32 ddr_cmds_num; + u32 cnoc_wait_bitmask; + u32 ddr_wait_bitmask; + u32 cnoc_cmds_addrs[6]; + u32 cnoc_cmds_data[2][6]; + u32 ddr_cmds_addrs[8]; + u32 ddr_cmds_data[16][8]; +}; + +#define HFI_H2F_MSG_TEST 5 + +struct a6xx_hfi_msg_test { + u32 header; +}; + +#endif diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 37746f1d54cf..7d3e9a129ac7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -111,6 +111,16 @@ static const struct adreno_info gpulist[] = { ADRENO_QUIRK_FAULT_DETECT_MASK, .init = a5xx_gpu_init, .zapfw = "a530_zap.mdt", + }, { + .rev = ADRENO_REV(6, 3, 0, ANY_ID), + .revn = 630, + .name = "A630", + .fw = { + [ADRENO_FW_SQE] = "a630_sqe.fw", + [ADRENO_FW_GMU] = "a630_gmu.bin", + }, + .gmem = SZ_1M, + .init = a6xx_gpu_init, }, }; @@ -127,6 +137,8 @@ MODULE_FIRMWARE("qcom/a530_zap.mdt"); MODULE_FIRMWARE("qcom/a530_zap.b00"); MODULE_FIRMWARE("qcom/a530_zap.b01"); MODULE_FIRMWARE("qcom/a530_zap.b02"); +MODULE_FIRMWARE("qcom/a630_sqe.fw"); +MODULE_FIRMWARE("qcom/a630_gmu.bin"); static inline bool _rev_match(uint8_t entry, uint8_t id) { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index d391ff377612..de6e6ee42fba 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -50,7 +50,9 @@ enum adreno_regs { enum { ADRENO_FW_PM4 = 0, + ADRENO_FW_SQE = 0, /* a6xx */ ADRENO_FW_PFP = 1, + ADRENO_FW_GMU = 1, /* a6xx */ ADRENO_FW_GPMU = 2, ADRENO_FW_MAX, }; @@ -335,6 +337,7 @@ static inline void adreno_gpu_write(struct adreno_gpu *gpu, struct msm_gpu *a3xx_gpu_init(struct drm_device *dev); struct msm_gpu *a4xx_gpu_init(struct drm_device *dev); struct msm_gpu *a5xx_gpu_init(struct drm_device *dev); +struct msm_gpu *a6xx_gpu_init(struct drm_device *dev); static inline void adreno_gpu_write64(struct adreno_gpu *gpu, enum adreno_regs lo, enum adreno_regs hi, u64 data) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ca368490b3ee..5e808cfec345 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -88,7 +88,7 @@ static struct devfreq_dev_profile msm_devfreq_profile = { static void msm_devfreq_init(struct msm_gpu *gpu) { /* We need target support to do devfreq */ - if (!gpu->funcs->gpu_busy) + if (!gpu->funcs->gpu_busy || !gpu->core_clk) return; msm_devfreq_profile.initial_freq = gpu->fast_rate;