From 90b8ad13536e80b1b4d9ed1c9d527e64ee757c26 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Mon, 14 Aug 2023 11:24:49 -0700 Subject: [PATCH 01/41] drm/i915: Fix TLB-Invalidation seqno store When getting the next gt's seqno to be stored into an objects mm.tlb[gt_id] array, fix the retrieval code to get it from the correct gt instead of the same one. Fixes: d6c531ab4820 ("drm/i915: Invalidate the TLBs on each GT") Signed-off-by: Alan Previn Reviewed-by: Andi Shyti Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230814182449.1060747-1-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e52089564d79..6f180ee13853 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1356,7 +1356,7 @@ void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) */ for_each_gt(gt, vm->i915, id) WRITE_ONCE(tlb[id], - intel_gt_next_invalidate_tlb_full(vm->gt)); + intel_gt_next_invalidate_tlb_full(gt)); } static void __vma_put_pages(struct i915_vma *vma, unsigned int count) From d3b0466adfd991e39a0550920898bebff870b7d1 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 14 Aug 2023 20:32:15 +0530 Subject: [PATCH 02/41] drm/i915/dg2: Remove Wa_15010599737 Since this Wa is specific to DirectX, this is not required on Linux. Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230814150215.873941-1-shekhar.chauhan@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 2cdfb2f713d0..0e4c638fcbbf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -412,9 +412,6 @@ #define XEHP_CULLBIT1 MCR_REG(0x6100) -#define CHICKEN_RASTER_1 MCR_REG(0x6204) -#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) - #define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 589d009032fc..500d38ddce0c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -798,9 +798,6 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); - /* Wa_15010599737:dg2 */ - wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); - /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } From 2eb23c9409e07a27cedfbcb2a886e62228ac4355 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 15 Aug 2023 09:53:44 -0700 Subject: [PATCH 03/41] drm/i915/selftests: Align igt_spinner_create_request with hangcheck Align igt_spinner_create_request with the hang_create_request implementation in selftest_hangcheck.c. Signed-off-by: Jonathan Cavitt Reviewed-by: Matt Roper Acked-by: Andi Shyti Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230815165345.964068-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/selftests/igt_spinner.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 0f064930ef11..8c3e1f20e5a1 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -179,6 +179,9 @@ igt_spinner_create_request(struct igt_spinner *spin, *batch++ = arbitration_command; + memset32(batch, MI_NOOP, 128); + batch += 128; + if (GRAPHICS_VER(rq->i915) >= 8) *batch++ = MI_BATCH_BUFFER_START | BIT(8) | 1; else if (IS_HASWELL(rq->i915)) From eaeb4b3614529bfa8a7edfdd7ecf6977b27f18b2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 16 Aug 2023 14:42:05 -0700 Subject: [PATCH 04/41] drm/i915/dg2: Drop pre-production GT workarounds DG2 first production steppings were C0 (for DG2-G10), B1 (for DG2-G11), and A1 (for DG2-G12). Several workarounds that apply onto to pre-production hardware can be dropped. Furthermore, several workarounds that apply to all production steppings can have their conditions simplified to no longer check the GT stepping. v2: - Keep Wa_16011777198 in place for now; it will be removed separately in a follow-up patch to keep review easier. Bspec: 44477 Signed-off-by: Matt Roper Acked-by: Jani Nikula Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20230816214201.534095-10-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 34 +--- drivers/gpu/drm/i915/gt/intel_mocs.c | 21 +- drivers/gpu/drm/i915/gt/intel_rc6.c | 6 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 211 +------------------- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 20 +- drivers/gpu/drm/i915/intel_clock_gating.c | 8 - 6 files changed, 21 insertions(+), 279 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 957d0aeb0c02..bc7ce2c2b959 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1315,29 +1315,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) return cs; } -/* - * On DG2 during context restore of a preempted context in GPGPU mode, - * RCS restore hang is detected. This is extremely timing dependent. - * To address this below sw wabb is implemented for DG2 A steppings. - */ -static u32 * -dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); - *cs++ = 0x21; - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); - - return cs; -} - /* * The bspec's tuning guide asks us to program a vertical watermark value of * 0x3FF. However this register is not saved/restored properly by the @@ -1362,14 +1339,8 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); - /* Wa_22011450934:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) - cs = dg2_emit_rcs_hang_wabb(ce, cs); - /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); cs = gen12_emit_aux_table_inv(ce->engine, cs); @@ -1390,8 +1361,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_restore_scratch(ce, cs); /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) if (ce->engine->class == COMPUTE_CLASS) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 2c014407225c..bf8b42d2d327 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { - /* Wa_14011441408: Set Go to Memory for MOCS#0 */ - MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), - - /* WB - LC */ - MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), -}; - static const struct drm_i915_mocs_entry pvc_mocs_table[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->wb_index = 2; table->unused_entries_index = 2; } else if (IS_DG2(i915)) { - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); - table->table = dg2_mocs_table_g10_ax; - } else { - table->size = ARRAY_SIZE(dg2_mocs_table); - table->table = dg2_mocs_table; - } + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->unused_entries_index = 3; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 58bb1c55294c..90933fb8cb97 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_EI_MODE(1); /* - * Wa_16011777198 and BSpec 52698 - Render powergating must be off. + * BSpec 52698 - Render powergating must be off. * FIXME BSpec is outdated, disabling powergating for MTL is just * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915) || - IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + if (IS_METEORLAKE(gt->i915)) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 500d38ddce0c..9084ee46ea07 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -764,39 +764,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, { dg2_ctx_gt_tuning_init(engine, wal); - /* Wa_16011186671:dg2_g11 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); - } - - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010469329:dg2_g10 */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); - - /* - * Wa_22010465075:dg2_g10 - * Wa_22010613112:dg2_g10 - * Wa_14010698770:dg2_g10 - */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - } - /* Wa_16013271637:dg2 */ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_18018764978:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); @@ -1602,31 +1578,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - struct intel_engine_cs *engine; - int id; - xehp_init_mcr(gt, wal); /* Wa_14011060649:dg2 */ wa_14011060649(gt, wal); - /* - * Although there are per-engine instances of these registers, - * they technically exist outside the engine itself and are not - * impacted by engine resets. Furthermore, they're part of the - * GuC blacklist so trying to treat them as engine workarounds - * will result in GuC initialization failure and a wedged GPU. - */ - for_each_engine(engine, gt, id) { - if (engine->class != VIDEO_DECODE_CLASS) - continue; - - /* Wa_16010515920:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), - ALNUNIT_CLKGATE_DIS); - } - if (IS_DG2_G10(gt->i915)) { /* Wa_22010523718:dg2 */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1637,65 +1593,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) DSS_ROUTER_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010948348:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); - - /* Wa_14011037102:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); - - /* Wa_14011371254:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); - - /* Wa_14011431319:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | - GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | - GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | - GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | - GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | - GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS); - wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | - GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | - GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | - GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | - GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | - GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | - GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | - GAMTLBVEBOX0_CLKGATE_DIS); - - /* Wa_14010569222:dg2_g10 */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - GAMEDIA_CLKGATE_DIS); - - /* Wa_14011028019:dg2_g10 */ - wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); - - /* Wa_14010680813:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL, - CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | - TAG_BLOCK_CLKGATE_DIS); - } - /* Wa_14014830051:dg2 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -2238,29 +2135,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: - /* - * Wa_1507100340:dg2_g10 - * - * This covers 4 registers which are next to one another : - * - PS_INVOCATION_COUNT - * - PS_INVOCATION_COUNT_UDW - * - PS_DEPTH_COUNT - * - PS_DEPTH_COUNT_UDW - */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4); - /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); break; - case COMPUTE_CLASS: - /* Wa_16011157294:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg(w, GEN9_CTX_PREEMPT_REG); - break; default: break; } @@ -2411,12 +2289,6 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } -static bool needs_wa_1308578152(struct intel_engine_cs *engine) -{ - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= - GEN_DSS_PER_GSLICE; -} - static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2431,42 +2303,20 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915) || - IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION); } - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14013392000:dg2_g11 */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012419201:dg2 */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); - } - - /* Wa_1308578152:dg2_g10 when first gslice is fused off */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && - needs_wa_1308578152(engine)) { - wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, - GEN12_REPLAY_MODE_GRANULARITY); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_DG2(i915)) { /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 @@ -2475,34 +2325,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_1608949956:dg2_g10 - * Wa_14010198302:dg2_g10 - */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - /* Wa_22010430635:dg2 */ - wa_mcr_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); - - /* Wa_14013202645:dg2 */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); - - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || - IS_DG2_G10(i915)) { + if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), @@ -3046,8 +2869,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); @@ -3068,8 +2890,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(i915)) { + if (IS_DG2_G11(i915)) { /* * Wa_22012826095:dg2 * Wa_22013059131:dg2 @@ -3083,18 +2904,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li FORCE_1_SUB_MESSAGE_PER_FRAGMENT); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_14010918519:dg2_g10 - * - * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, - * so ignoring verification. - */ - wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); - } - if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_mcr_masked_en(wal, @@ -3127,7 +2936,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) + if (IS_DG2_G11(i915)) /* * Wa_22012654132 * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 569b5fe94c41..82a2ecc12b21 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; - /* Wa_16011759253:dg2_g10:a0 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959 */ if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* - * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 - * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 + * Wa_14012197797 + * Wa_22011391025 * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. @@ -297,17 +293,11 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) flags |= GUC_WA_PRE_PARSER; - /* Wa_16011777198:dg2 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) - flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - /* - * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) - * Wa_22012727685:dg2_g11[a0..) + * Wa_22012727170 + * Wa_22012727685 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) + if (IS_DG2_G11(gt->i915)) flags |= GUC_WA_CONTEXT_ISOLATION; /* Wa_16015675438 */ diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c index d9600cd1ab06..139e40434d8b 100644 --- a/drivers/gpu/drm/i915/intel_clock_gating.c +++ b/drivers/gpu/drm/i915/intel_clock_gating.c @@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) /* Wa_22010954014:dg2 */ intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); - - /* - * Wa_14010733611:dg2_g10 - * Wa_22010146351:dg2_g10 - */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, - SGR_DIS | SGGI_DIS); } static void pvc_init_clock_gating(struct drm_i915_private *i915) From f1c805716516f9e648e13f0108cea8096e0c7023 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 16 Aug 2023 14:42:06 -0700 Subject: [PATCH 05/41] drm/i915: Tidy workaround definitions Removal of the DG2 pre-production workarounds has left duplicate condition blocks in a couple places, as well as some inconsistent platform ordering. Reshuffle and consolidate some of the workarounds to reduce the number of condition blocks and to more consistently follow the "newest platform first" convention. Code movement only; no functional change. Signed-off-by: Matt Roper Acked-by: Jani Nikula Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20230816214201.534095-11-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 100 +++++++++----------- 1 file changed, 46 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 9084ee46ea07..afe6769101f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2333,6 +2333,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) true); } + if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p + * Wa_18019627453:dg2 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ @@ -2346,19 +2359,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - } - if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* - * Wa_1606700617:tgl,dg1,adl-p - * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p - * Wa_14010826681:tgl,dg1,rkl,adl-p - * Wa_18019627453:dg2 - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || @@ -2385,14 +2390,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || - IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { - /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_mcr_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2873,6 +2870,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_18017747507 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || @@ -2883,11 +2883,20 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2(i915)) { - /* Wa_18017747507 */ - wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); + } + + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } if (IS_DG2_G11(i915)) { @@ -2902,6 +2911,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013059131:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + + /* + * Wa_22012654132 + * + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. + */ + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } if (IS_XEHPSDV(i915)) { @@ -2919,35 +2940,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); } - - if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { - /* Wa_14015227452:dg2,pvc */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - - /* Wa_16015675438:dg2,pvc */ - wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - } - - if (IS_DG2(i915)) { - /* - * Wa_16011620976:dg2_g11 - * Wa_22015475538:dg2 - */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); - } - - if (IS_DG2_G11(i915)) - /* - * Wa_22012654132 - * - * Note that register 0xE420 is write-only and cannot be read - * back for verification on DG2 (due to Wa_14012342262), so - * we need to explicitly skip the readback. - */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); } static void From c9517783060ad09968cef884e712907c70d7a658 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 16 Aug 2023 14:48:25 -0700 Subject: [PATCH 06/41] drm/i915/dg2: Drop Wa_16011777198 Wa_16011777198 only applies to pre-production steppings of DG2, which we're no longer supporting. Remove the workaround and override_gucrc handling, which is no longer needed. Since this was the final use of IS_DG2_GRAPHICS_STEP, that macro can also be removed now. v2: - Include the promised removal of override_gucrc handling. Cc: Ashutosh Dixit Cc: Vinay Belgaumkar Cc: Umesh Nerlige Ramappa Signed-off-by: Matt Roper Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230816214824.548575-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 63 --------------------- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 - drivers/gpu/drm/i915/i915_drv.h | 19 ------- drivers/gpu/drm/i915/i915_perf.c | 32 ----------- drivers/gpu/drm/i915/i915_perf_types.h | 6 -- 5 files changed, 122 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 477df260ae3a..2dfb07cc4b33 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -138,17 +138,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } -static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) -{ - u32 request[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), - id, - }; - - return intel_guc_send(guc, request, ARRAY_SIZE(request)); -} - static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -199,15 +188,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } -static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - - GEM_BUG_ON(id >= SLPC_MAX_PARAM); - - return guc_action_slpc_unset_param(guc, id); -} - static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -672,49 +652,6 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) slpc->boost_freq = slpc->rp0_freq; } -/** - * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode - * @slpc: pointer to intel_guc_slpc. - * @mode: new value of the mode. - * - * This function will override the GUCRC mode. - * - * Return: 0 on success, non-zero error code on failure. - */ -int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode) -{ - int ret; - struct drm_i915_private *i915 = slpc_to_i915(slpc); - intel_wakeref_t wakeref; - - if (mode >= SLPC_GUCRC_MODE_MAX) - return -EINVAL; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode); - if (ret) - guc_err(slpc_to_guc(slpc), "Override RC mode %d failed: %pe\n", - mode, ERR_PTR(ret)); - } - - return ret; -} - -int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc) -{ - struct drm_i915_private *i915 = slpc_to_i915(slpc); - intel_wakeref_t wakeref; - int ret = 0; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE); - if (ret) - guc_err(slpc_to_guc(slpc), "Unsetting RC mode failed: %pe\n", ERR_PTR(ret)); - } - - return ret; -} - /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 597eb5413ddf..6ac6503c39d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -44,8 +44,6 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); -int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc); -int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); #endif diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a1a2fe31f434..f3710b9ee001 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -698,25 +698,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_METEORLAKE(__i915) && \ IS_MEDIA_STEP(__i915, since, until)) -/* - * DG2 hardware steppings are a bit unusual. The hardware design was forked to - * create three variants (G10, G11, and G12) which each have distinct - * workaround sets. The G11 and G12 forks of the DG2 design reset the GT - * stepping back to "A0" for their first iterations, even though they're more - * similar to a G10 B0 stepping and G10 C0 stepping respectively in terms of - * functionality and workarounds. However the display stepping does not reset - * in the same manner --- a specific stepping like "B0" has a consistent - * meaning regardless of whether it belongs to a G10, G11, or G12 DG2. - * - * TLDR: All GT workarounds and stepping-specific logic must be applied in - * relation to a specific subplatform (G10/G11/G12), whereas display workarounds - * and stepping-specific logic will be applied with a general DG2-wide stepping - * number. - */ -#define IS_DG2_GRAPHICS_STEP(__i915, variant, since, until) \ - (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \ - IS_GRAPHICS_STEP(__i915, since, until)) - #define IS_DG2_DISPLAY_STEP(__i915, since, until) \ (IS_DG2(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 04bc1f4a1115..dfe7eff7d1a1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1675,13 +1675,6 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_oa_buffer(stream); - /* - * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6. - */ - if (stream->override_gucrc) - drm_WARN_ON(>->i915->drm, - intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc)); - intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); intel_engine_pm_put(stream->engine); @@ -3272,7 +3265,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct drm_i915_private *i915 = stream->perf->i915; struct i915_perf *perf = stream->perf; struct i915_perf_group *g; - struct intel_gt *gt; int ret; if (!props->engine) { @@ -3280,7 +3272,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, "OA engine not specified\n"); return -EINVAL; } - gt = props->engine->gt; g = props->engine->oa_group; /* @@ -3381,25 +3372,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, intel_engine_pm_get(stream->engine); intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); - /* - * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes - * OA to lose the configuration state. Prevent this by overriding GUCRC - * mode. - */ - if (intel_uc_uses_guc_rc(>->uc) && - (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) { - ret = intel_guc_slpc_override_gucrc_mode(>->uc.guc.slpc, - SLPC_GUCRC_MODE_GUCRC_NO_RC6); - if (ret) { - drm_dbg(&stream->perf->i915->drm, - "Unable to override gucrc mode\n"); - goto err_gucrc; - } - - stream->override_gucrc = true; - } - ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; @@ -3436,10 +3408,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(stream); err_oa_buf_alloc: - if (stream->override_gucrc) - intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc); - -err_gucrc: intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); intel_engine_pm_put(stream->engine); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index fe3a5dae8c22..13b1ae9b96c7 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -338,12 +338,6 @@ struct i915_perf_stream { * buffer should be checked for available data. */ u64 poll_oa_period; - - /** - * @override_gucrc: GuC RC has been overridden for the perf stream, - * and we need to restore the default configuration on release. - */ - bool override_gucrc; }; /** From 28c46feec7f8760683ef08f12746630a3598173e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:21 -0700 Subject: [PATCH 07/41] drm/i915: Consolidate condition for Wa_22011802037 The workaround bounds for Wa_22011802037 are somewhat complex and are replicated in several places throughout the code. Pull the condition out to a helper function to prevent mistakes if this condition needs to change again in the future. Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-12-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +--- .../drm/i915/gt/intel_execlists_submission.c | 4 +--- drivers/gpu/drm/i915/gt/intel_reset.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_reset.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 +--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +--- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ee15486fed0d..dfb69fc977a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1617,9 +1617,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, * Wa_22011802037: Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 8a641bcf777c..4d05321dc5b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_engine_wait_for_pending_mi_fw(engine); engine->execlists.reset_ccid = active_ccid(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index cc6bd21a3e51..1ff7b42521c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1632,6 +1632,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w) w->gt = NULL; } +/* + * Wa_22011802037 requires that we (or the GuC) ensure that no command + * streamers are executing MI_FORCE_WAKE while an engine reset is initiated. + */ +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) < 11) + return false; + + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0)) + return true; + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" #include "selftest_hangcheck.c" diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 25c975b6e8fc..f615b30b81c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); bool intel_has_gpu_reset(const struct intel_gt *gt); bool intel_has_reset_engine(const struct intel_gt *gt); +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt); + #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 82a2ecc12b21..da967938fea5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -288,9 +288,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(gt->i915) >= 11 && - GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(gt)) flags |= GUC_WA_PRE_PARSER; /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a0e3ef1c65d2..1bd5d8f7c40b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1658,9 +1658,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { intel_engine_stop_cs(engine); intel_engine_wait_for_pending_mi_fw(engine); } From ea2f15565d7a082bf21f131225a656793fdca359 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:22 -0700 Subject: [PATCH 08/41] drm/i915/xelpmp: Don't assume workarounds extend to future platforms The currently implemented Xe_LPM+ workarounds are specific to media version 13.00. When new IP versions show up in the future, they'll need their own workaround lists. Signed-off-by: Matt Roper Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-13-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index afe6769101f9..b33615cd84d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1711,10 +1711,10 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) gt_tuning_settings(gt, wal); if (gt->type == GT_MEDIA) { - if (MEDIA_VER(i915) >= 13) + if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) xelpmp_gt_workarounds_init(gt, wal); else - MISSING_CASE(MEDIA_VER(i915)); + MISSING_CASE(MEDIA_VER_FULL(i915)); return; } From f7696ded7c9e358670dae1801660f442f059c7db Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:23 -0700 Subject: [PATCH 09/41] drm/i915/xelpg: Call Xe_LPG workaround functions based on IP version Although some of our Xe_LPG workarounds were already being applied based on IP version correctly, others were matching on MTL as a base platform, which is incorrect. Although MTL is the only platform right now that uses Xe_LPG IP, this may not always be the case. If a future platform re-uses this graphics IP, the same workarounds should be applied, even if it isn't a "MTL" platform. We were also incorrectly applying Xe_LPG workarounds/tuning to the Xe_LPM+ media IP in one or two places; we should make sure that we don't try to apply graphics workarounds to the media GT and vice versa where they don't belong. A new helper macro IS_GT_IP_RANGE() is added to help ensure this is handled properly -- it checks that the GT matches the IP type being tested as well as the IP version falling in the proper range. Note that many of the stepping-based workarounds are still incorrectly checking for a MTL base platform; that will be remedied in a later patch. v2: - Rework macro into a slightly more generic IS_GT_IP_RANGE() that can be used for either GFX or MEDIA checks. v3: - Switch back to separate macros for gfx and media. (Jani) - Move macro to intel_gt.h. (Andi) Cc: Gustavo Sousa Cc: Tvrtko Ursulin Cc: Jani Nikula Cc: Andi Shyti Signed-off-by: Matt Roper Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-14-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.h | 11 ++++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 38 +++++++++++---------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 6c34547b58b5..15c25980411d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -14,6 +14,17 @@ struct drm_i915_private; struct drm_printer; +/* + * Check that the GT is a graphics GT and has an IP version within the + * specified range (inclusive). + */ +#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt)->type != GT_MEDIA && \ + GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ + GRAPHICS_VER_FULL((gt)->i915) <= (until))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b33615cd84d9..bdd020e81639 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -778,8 +778,8 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } -static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; @@ -790,12 +790,12 @@ static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } -static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - mtl_ctx_gt_tuning_init(engine, wal); + xelpg_ctx_gt_tuning_init(engine, wal); if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { @@ -904,8 +904,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_METEORLAKE(i915)) - mtl_ctx_workarounds_init(engine, wal); + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ else if (IS_DG2(i915)) @@ -1684,10 +1684,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(gt->i915)) { - if (gt->type != GT_MEDIA) - wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1719,7 +1717,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) return; } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -2168,7 +2166,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine) blacklist_trtt(engine); } -static void mtl_whitelist_build(struct intel_engine_cs *engine) +static void xelpg_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -2190,8 +2188,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, engine->gt, "whitelist", engine->name); - if (IS_METEORLAKE(i915)) - mtl_whitelist_build(engine); + if (engine->gt->type == GT_MEDIA) + ; /* none yet */ + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); else if (IS_DG2(i915)) @@ -2791,10 +2791,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * function invoked by __intel_engine_init_ctx_wa(). */ static void -add_render_compute_tuning_settings(struct drm_i915_private *i915, +add_render_compute_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(i915) || IS_DG2(i915)) + struct drm_i915_private *i915 = gt->i915; + + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -2824,7 +2826,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; - add_render_compute_tuning_settings(i915, wal); + add_render_compute_tuning_settings(engine->gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as From 5a213086a025349361b5cf75c8fd4591d96a7a99 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:24 -0700 Subject: [PATCH 10/41] drm/i915: Eliminate IS_MTL_GRAPHICS_STEP Several workarounds are guarded by IS_MTL_GRAPHICS_STEP. However none of these workarounds are actually tied to MTL as a platform; they only relate to the Xe_LPG graphics IP, regardless of what platform it appears in. At the moment MTL is the only platform that uses Xe_LPG with IP versions 12.70 and 12.71, but we can't count on this being true in the future. Switch these to use a new IS_GFX_GT_IP_STEP() macro instead that is purely based on IP version. IS_GFX_GT_IP_STEP() is also GT-based rather than device-based, which will help prevent mistakes where we accidentally try to apply Xe_LPG graphics workarounds to the Xe_LPM+ media GT and vice-versa. v2: - Switch to a more generic and shorter IS_GT_IP_STEP macro that can be used for both graphics and media IP (and any other kind of GTs that show up in the future). v3: - Switch back to long-form IS_GFX_GT_IP_STEP macro. (Jani) - Move macro to intel_gt.h. (Andi) v4: - Build IS_GFX_GT_IP_STEP on top of IS_GFX_GT_IP_RANGE and IS_GRAPHICS_STEP building blocks and name the parameters from/until rather than begin/fixed. (Jani) - Fix usage examples in comment. v5: - Tweak comment on macro. (Gustavo) Cc: Gustavo Sousa Cc: Tvrtko Ursulin Cc: Andi Shyti Cc: Jani Nikula Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-15-matthew.d.roper@intel.com --- .../drm/i915/display/skl_universal_plane.c | 5 +- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 11 +++-- drivers/gpu/drm/i915/gt/intel_gt.h | 20 ++++++++ drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 7 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 48 ++++++++++--------- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 -- 10 files changed, 62 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 6b01a0b68b97..c3b8a5a378f8 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -20,6 +20,7 @@ #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" +#include "gt/intel_gt.h" #include "pxp/intel_pxp.h" static const u32 skl_plane_formats[] = { @@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915, enum pipe pipe, enum plane_id plane_id) { /* Wa_14017240301 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0)) return false; /* Wa_22011186057 */ diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 8a3cbb01bcbe..67a6a13bf39b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -4,9 +4,9 @@ */ #include "gen8_engine_cs.h" -#include "i915_drv.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" +#include "intel_gt.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -799,6 +799,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; + struct intel_gt *gt = rq->engine->gt; u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | @@ -809,8 +810,8 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 15c25980411d..6e63b46682f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -25,6 +25,26 @@ struct drm_printer; GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ GRAPHICS_VER_FULL((gt)->i915) <= (until))) +/* + * Check that the GT is a graphics GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. E.g., + * + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER) + * + * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper + * stepping bound for the specified IP version. + */ +#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 0b414eae1683..11d181b1cc7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -3,8 +3,7 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_drv.h" - +#include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { /* Wa_14016747170 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, intel_uncore_read(gt->uncore, MTL_GT_ACTIVITY_FACTOR)); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bc7ce2c2b959..967fe4d77a87 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1346,8 +1346,8 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_aux_table_inv(ce->engine, cs); /* Wa_16014892111 */ - if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 1ff7b42521c9..fd6c22aeb670 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1641,7 +1641,7 @@ bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) if (GRAPHICS_VER(gt->i915) < 11) return false; - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)) return true; if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index bdd020e81639..12ac218afb2a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -781,24 +781,24 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; dg2_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; xelpg_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014947963 */ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); @@ -1640,8 +1640,8 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014830051 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -2293,23 +2293,24 @@ static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_22014600077 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, @@ -2825,8 +2826,9 @@ static void general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - add_render_compute_tuning_settings(engine->gt, wal); + add_render_compute_tuning_settings(gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as @@ -2847,13 +2849,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* * Wa_14017066071 * Wa_14017654203 @@ -2861,13 +2863,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, MTL_DISABLE_SAMPLER_SC_OOO); - if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* Wa_22015279794 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_PREFETCH_INTO_IC); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, @@ -2877,8 +2879,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { /* Wa_22014226127 */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index da967938fea5..861d0c58388c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -273,7 +273,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_POLLCS; /* Wa_14014475959 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1bd5d8f7c40b..b2150a962f69 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4265,7 +4265,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) /* Wa_14014475959:dg2 */ if (engine->class == COMPUTE_CLASS) - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f3710b9ee001..b9af8b933d7e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -686,10 +686,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \ - (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \ - IS_GRAPHICS_STEP(__i915, since, until)) - #define IS_MTL_DISPLAY_STEP(__i915, since, until) \ (IS_METEORLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) From 81af8abe65134f1b1872e6dfc4ba520c06ac363a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:25 -0700 Subject: [PATCH 11/41] drm/i915: Eliminate IS_MTL_MEDIA_STEP Stepping-specific media behavior shouldn't be tied to MTL as a platform, but rather specifically to the Xe_LPM+ IP. Future non-MTL platforms may re-use this IP and will need to follow the exact same logic and apply the same workarounds. IS_MTL_MEDIA_STEP() is dropped in favor of IS_MEDIA_GT_IP_STEP, which checks the media IP version associated with a specific IP and also ensures that we're operating on the media GT, not the primary GT. v2: - Switch to the IS_GT_IP_STEP macro. v3: - Switch back to long-form IS_MEDIA_GT_IP_STEP. (Jani) v4: - Build IS_MEDIA_GT_IP_STEP on top of IS_MEDIA_GT_IP_RANGE and IS_MEDIA_STEP building blocks and name the parameters from/until rather than begin/fixed.. (Jani) v5: - Tweak macro comment wording. (Gustavo) - Add a check to catch NULL gt in IS_MEDIA_GT_IP_RANGE; this allows it to be used safely on i915->media_gt, which may be NULL on some platforms. (Gustavo) Cc: Jani Nikula Cc: Gustavo Sousa Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-16-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.h | 32 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rc6.c | 3 +-- drivers/gpu/drm/i915/i915_drv.h | 4 ---- drivers/gpu/drm/i915/i915_perf.c | 15 ++++---------- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 6e63b46682f7..239848bcb2a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -25,6 +25,20 @@ struct drm_printer; GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ GRAPHICS_VER_FULL((gt)->i915) <= (until))) +/* + * Check that the GT is a media GT and has an IP version within the + * specified range (inclusive). + * + * Only usable on platforms with a standalone media design (i.e., IP version 13 + * and higher). + */ +#define IS_MEDIA_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(13, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt) && (gt)->type == GT_MEDIA && \ + MEDIA_VER_FULL((gt)->i915) >= (from) && \ + MEDIA_VER_FULL((gt)->i915) <= (until))) + /* * Check that the GT is a graphics GT with a specific IP version and has * a stepping in the range [from, until). The lower stepping bound is @@ -45,6 +59,24 @@ struct drm_printer; (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) +/* + * Check that the GT is a media GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. "STEP_FOREVER" can be passed as "until" for + * workarounds that have no upper stepping bound for the specified IP version. + * + * This macro may only be used to match on platforms that have a standalone + * media design (i.e., media version 13 or higher). + */ +#define IS_MEDIA_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_MEDIA_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_MEDIA_STEP((gt)->i915, (from), (until)))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 90933fb8cb97..86df42cb5823 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -524,8 +524,7 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } - if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) { + if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) { drm_notice(&i915->drm, "Media RC6 disabled on A step\n"); return false; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b9af8b933d7e..75c5fb57935c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -690,10 +690,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_METEORLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_MTL_MEDIA_STEP(__i915, since, until) \ - (IS_METEORLAKE(__i915) && \ - IS_MEDIA_STEP(__i915, since, until)) - #define IS_DG2_DISPLAY_STEP(__i915, since, until) \ (IS_DG2(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index dfe7eff7d1a1..605e6e4fedf1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4191,7 +4191,7 @@ static int read_properties_unlocked(struct i915_perf *perf, * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM * does not work as expected. */ - if (IS_MTL_MEDIA_STEP(props->engine->i915, STEP_A0, STEP_C0) && + if (IS_MEDIA_GT_IP_STEP(props->engine->gt, IP_VER(13, 0), STEP_A0, STEP_C0) && props->engine->oa_group->type == TYPE_OAM && intel_check_bios_c6_setup(&props->engine->gt->rc6)) { drm_dbg(&perf->i915->drm, @@ -5300,16 +5300,9 @@ int i915_perf_ioctl_version(struct drm_i915_private *i915) * C6 disable in BIOS. If Media C6 is enabled in BIOS, return version 6 * to indicate that OA media is not supported. */ - if (IS_MTL_MEDIA_STEP(i915, STEP_A0, STEP_C0)) { - struct intel_gt *gt; - int i; - - for_each_gt(gt, i915, i) { - if (gt->type == GT_MEDIA && - intel_check_bios_c6_setup(>->rc6)) - return 6; - } - } + if (IS_MEDIA_GT_IP_STEP(i915->media_gt, IP_VER(13, 0), STEP_A0, STEP_C0) && + intel_check_bios_c6_setup(&i915->media_gt->rc6)) + return 6; return 7; } From 2e3c369f23a77c404fd6b364a120a546f30e651c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:27 -0700 Subject: [PATCH 12/41] drm/i915/mtl: Eliminate subplatforms Now that we properly match the Xe_LPG IP versions associated with various workarounds, there's no longer any need to define separate MTL subplatform in the driver. Nothing in the code is conditional on MTL-M or MTL-P base platforms. Furthermore, I'm not sure the "M" and "P" designations are even an accurate representation of which specific platforms would have which IP versions; those were mostly just placeholders from a long time ago. The reality is that the IP version present on a platform gets read from a fuse register at driver init; we shouldn't be trying to guess which IP is present based on PCI ID anymore. Signed-off-by: Matt Roper Reviewed-by: Nemesa Garg Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-18-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ---- drivers/gpu/drm/i915/intel_device_info.c | 14 -------------- drivers/gpu/drm/i915/intel_device_info.h | 4 ---- include/drm/i915_pciids.h | 13 ++++--------- 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 75c5fb57935c..b08026cd4f40 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -573,10 +573,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_PONTEVECCHIO(i915) IS_PLATFORM(i915, INTEL_PONTEVECCHIO) #define IS_METEORLAKE(i915) IS_PLATFORM(i915, INTEL_METEORLAKE) -#define IS_METEORLAKE_M(i915) \ - IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_M) -#define IS_METEORLAKE_P(i915) \ - IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_P) #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(i915) \ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index ea0ec6174ce5..9dfa680a4c62 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -206,14 +206,6 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(0), }; -static const u16 subplatform_m_ids[] = { - INTEL_MTL_M_IDS(0), -}; - -static const u16 subplatform_p_ids[] = { - INTEL_MTL_P_IDS(0), -}; - static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -275,12 +267,6 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); - } else if (find_devid(devid, subplatform_m_ids, - ARRAY_SIZE(subplatform_m_ids))) { - mask = BIT(INTEL_SUBPLATFORM_M); - } else if (find_devid(devid, subplatform_p_ids, - ARRAY_SIZE(subplatform_p_ids))) { - mask = BIT(INTEL_SUBPLATFORM_P); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index dbfe6443457b..2ca54417d19b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -129,10 +129,6 @@ enum intel_platform { #define INTEL_SUBPLATFORM_N 1 #define INTEL_SUBPLATFORM_RPLU 2 -/* MTL */ -#define INTEL_SUBPLATFORM_M 0 -#define INTEL_SUBPLATFORM_P 1 - enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index e1e10dfbb661..38dae757d1a8 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -738,18 +738,13 @@ #define INTEL_ATS_M_IDS(info) \ INTEL_ATS_M150_IDS(info), \ INTEL_ATS_M75_IDS(info) -/* MTL */ -#define INTEL_MTL_M_IDS(info) \ - INTEL_VGA_DEVICE(0x7D40, info), \ - INTEL_VGA_DEVICE(0x7D60, info) -#define INTEL_MTL_P_IDS(info) \ +/* MTL */ +#define INTEL_MTL_IDS(info) \ + INTEL_VGA_DEVICE(0x7D40, info), \ INTEL_VGA_DEVICE(0x7D45, info), \ INTEL_VGA_DEVICE(0x7D55, info), \ + INTEL_VGA_DEVICE(0x7D60, info), \ INTEL_VGA_DEVICE(0x7DD5, info) -#define INTEL_MTL_IDS(info) \ - INTEL_MTL_M_IDS(info), \ - INTEL_MTL_P_IDS(info) - #endif /* _I915_PCIIDS_H */ From 14128d64090fa88445376cb8ccf91c50c08bd410 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 21 Aug 2023 11:06:29 -0700 Subject: [PATCH 13/41] drm/i915: Replace several IS_METEORLAKE with proper IP version checks Many of the IS_METEORLAKE conditions throughout the driver are supposed to be checks for Xe_LPG and/or Xe_LPM+ IP, not for the MTL platform specifically. Update those checks to ensure that the code will still operate properly if/when these IP versions show up on future platforms. v2: - Update two more conditions (one for pg_enable, one for MTL HuC compatibility). v3: - Don't change GuC/HuC compatibility check, which sounds like it truly is specific to the MTL platform. (Gustavo) - Drop a non-lineage workaround number for the OA timestamp frequency workaround. (Gustavo) Cc: Gustavo Sousa Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230821180619.650007-20-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +- drivers/gpu/drm/i915/gt/intel_rc6.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 11 +++++------ 8 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index d24c0ce8805c..19156ba4b9ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data) BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); - /* Limiting the extension only to Meteor Lake */ - if (!IS_METEORLAKE(i915)) + /* Limiting the extension only to Xe_LPG and beyond */ + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) return -ENODEV; if (copy_from_user(&ext, base, sizeof(ext))) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b538b5c04948..e91fc881dbf1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (IS_METEORLAKE(i915) && engine->id == GSC0) { + if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { intel_uncore_write(engine->gt->uncore, RC_PSMI_CTRL_GSCCS, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index bf8b42d2d327..07269ff3be13 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -495,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) { table->size = ARRAY_SIZE(mtl_mocs_table); table->table = mtl_mocs_table; table->n_entries = MTL_NUM_MOCS_ENTRIES; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 86df42cb5823..8b67abd720be 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -123,7 +123,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index fd6c22aeb670..98575d79c446 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt) static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) { - if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) + if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0)) return false; if (!__HAS_ENGINE(engine_mask, GSC0)) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 092542f53aad..4feef874e6d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c { struct drm_i915_private *i915 = rps_to_i915(rps); - if (IS_METEORLAKE(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) return mtl_get_freq_caps(rps, caps); else return __gen6_rps_get_freq_caps(rps, caps); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4de44cf1026d..7a90a2e32c9f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { switch (obj->pat_index) { case 0: return " WB"; case 1: return " WT"; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 605e6e4fedf1..018f42fff4cc 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3220,11 +3220,10 @@ get_sseu_config(struct intel_sseu *out_sseu, */ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) { - /* - * Wa_18013179988:dg2 - * Wa_14015846243:mtl - */ - if (IS_DG2(i915) || IS_METEORLAKE(i915)) { + struct intel_gt *gt = to_gt(i915); + + /* Wa_18013179988 */ + if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { intel_wakeref_t wakeref; u32 reg, shift; @@ -4507,7 +4506,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - if (IS_METEORLAKE(perf->i915)) + if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) return reg_in_range_table(addr, mtl_oa_mux_regs); else return reg_in_range_table(addr, gen12_oa_mux_regs); From b2edc4148ad90c906275329711810721719a0c2d Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 15 Aug 2023 17:39:57 -0700 Subject: [PATCH 14/41] drm/i915/guc: Force a reset on internal GuC error If GuC hits an internal error (and survives long enough to report it to the KMD), it is basically toast and will stop until a GT reset and subsequent GuC reload is performed. Previously, the KMD just printed an error message and then waited for the heartbeat to eventually kick in and trigger a reset (assuming the heartbeat had not been disabled). Instead, force the reset immediately to guarantee that it happens and to eliminate the very long heartbeat delay. The captured error state is also more likely to be useful if captured at the time of the error rather than many seconds later. Note that it is not possible to trigger a reset from with the G2H handler itself. The reset prepare process involves flushing outstanding G2H contents. So a deadlock could result. Instead, the G2H handler queues a worker thread to do the reset asynchronously. v2: Flush the worker on suspend and shutdown. Add rate limiting to prevent spam from a totally dead system (review feedback from Daniele). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230816003957.3572654-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 38 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc.h | 15 +++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 +--- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 861d0c58388c..27df41c53b89 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -159,6 +159,21 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc) gen11_reset_guc_interrupts(guc); } +static void guc_dead_worker_func(struct work_struct *w) +{ + struct intel_guc *guc = container_of(w, struct intel_guc, dead_guc_worker); + struct intel_gt *gt = guc_to_gt(guc); + unsigned long last = guc->last_dead_guc_jiffies; + unsigned long delta = jiffies_to_msecs(jiffies - last); + + if (delta < 500) { + intel_gt_set_wedged(gt); + } else { + intel_gt_handle_error(gt, ALL_ENGINES, I915_ERROR_CAPTURE, "dead GuC"); + guc->last_dead_guc_jiffies = jiffies; + } +} + void intel_guc_init_early(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -171,6 +186,8 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_slpc_init_early(&guc->slpc); intel_guc_rc_init_early(guc); + INIT_WORK(&guc->dead_guc_worker, guc_dead_worker_func); + mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); if (GRAPHICS_VER(i915) >= 11) { @@ -449,6 +466,8 @@ void intel_guc_fini(struct intel_guc *guc) if (!intel_uc_fw_is_loadable(&guc->fw)) return; + flush_work(&guc->dead_guc_worker); + if (intel_guc_slpc_is_used(guc)) intel_guc_slpc_fini(&guc->slpc); @@ -573,6 +592,20 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len, return ret; } +int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action) +{ + if (action == INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) + guc_err(guc, "Crash dump notification\n"); + else if (action == INTEL_GUC_ACTION_NOTIFY_EXCEPTION) + guc_err(guc, "Exception notification\n"); + else + guc_err(guc, "Unknown crash notification: 0x%04X\n", action); + + queue_work(system_unbound_wq, &guc->dead_guc_worker); + + return 0; +} + int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len) { @@ -589,6 +622,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, if (msg & INTEL_GUC_RECV_MSG_EXCEPTION) guc_err(guc, "Received early exception notification!\n"); + if (msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | INTEL_GUC_RECV_MSG_EXCEPTION)) + queue_work(system_unbound_wq, &guc->dead_guc_worker); + return 0; } @@ -628,6 +664,8 @@ int intel_guc_suspend(struct intel_guc *guc) return 0; if (intel_guc_submission_is_used(guc)) { + flush_work(&guc->dead_guc_worker); + /* * This H2G MMIO command tears down the GuC in two steps. First it will * generate a G2H CTB for every active context indicating a reset. In diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8dc291ff0093..6c392bad29c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -266,6 +266,20 @@ struct intel_guc { unsigned long last_stat_jiffies; } timestamp; + /** + * @dead_guc_worker: Asynchronous worker thread for forcing a GuC reset. + * Specifically used when the G2H handler wants to issue a reset. Resets + * require flushing the G2H queue. So, the G2H processing itself must not + * trigger a reset directly. Instead, go via this worker. + */ + struct work_struct dead_guc_worker; + /** + * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance + * used to prevent a fundamentally broken system from continuously + * reloading the GuC. + */ + unsigned long last_dead_guc_jiffies; + #ifdef CONFIG_DRM_I915_SELFTEST /** * @number_guc_id_stolen: The number of guc_ids that have been stolen @@ -476,6 +490,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_error_capture_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action); struct intel_engine_cs * intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 97eadd08181d..6e22af31513a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1112,12 +1112,8 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r ret = 0; break; case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: - CT_ERROR(ct, "Received GuC crash dump notification!\n"); - ret = 0; - break; case INTEL_GUC_ACTION_NOTIFY_EXCEPTION: - CT_ERROR(ct, "Received GuC exception notification!\n"); - ret = 0; + ret = intel_guc_crash_process_msg(guc, action); break; default: ret = -EOPNOTSUPP; From 8940da9fe5f278ac6ecb4cafa55c784f524cb3b2 Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 22 Aug 2023 22:57:43 +0530 Subject: [PATCH 15/41] drm/i915/mtl: Adding DeviceID for Arrowlake-S under MTL Arrowlake-S graphics is similar enough to Meteorlake that we can treat them as the same platform Signed-off-by: Nemesa Garg Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230822172743.2113377-1-nemesa.garg@intel.com --- include/drm/i915_pciids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 38dae757d1a8..1661f9e552d2 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -745,6 +745,7 @@ INTEL_VGA_DEVICE(0x7D45, info), \ INTEL_VGA_DEVICE(0x7D55, info), \ INTEL_VGA_DEVICE(0x7D60, info), \ + INTEL_VGA_DEVICE(0x7D67, info), \ INTEL_VGA_DEVICE(0x7DD5, info) #endif /* _I915_PCIIDS_H */ From e427343185583123fd3485ca7ca595325e589429 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 25 Aug 2023 09:27:53 -0700 Subject: [PATCH 16/41] drm/i915/gsc: define gsc fw Add FW definition and the matching override modparam. The GSC FW has both a release version, based on platform and a rolling counter, and a compatibility version, which is the one tracking interface changes. Since what we care about is the interface, we use the compatibility version in the binary names. Same as with the GuC, a major version bump indicate a backward-incompatible change, while a minor version bump indicates a backward-compatible one, so we use only the former in the file name. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Tvrtko Ursulin Cc: Rodrigo Vivi Reviewed-by: Alan Previn Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230825162754.1949838-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 32 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 9e833f499ac7..fc0d05d2df59 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -131,6 +131,17 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, huc_mmp(bxt, 2, 0, 0)) \ fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0)) +/* + * The GSC FW has multiple version (see intel_gsc_uc.h for details); since what + * we care about is the interface, we use the compatibility version in the + * binary names. + * Same as with the GuC, a major version bump indicate a + * backward-incompatible change, while a minor version bump indicates a + * backward-compatible one, so we use only the former in the file name. + */ +#define INTEL_GSC_FIRMWARE_DEFS(fw_def, gsc_def) \ + fw_def(METEORLAKE, 0, gsc_def(mtl, 1, 0)) + /* * Set of macros for producing a list of filenames from the above table. */ @@ -166,6 +177,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_) +#define MAKE_GSC_FW_PATH(prefix_, major_, minor_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "gsc", major_) + /* * All blobs need to be declared via MODULE_FIRMWARE(). * This first expansion of the table macros is solely to provide @@ -176,6 +190,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC) +INTEL_GSC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GSC_FW_PATH) /* * The next expansion of the table macros (in __uc_fw_auto_select below) provides @@ -225,6 +240,10 @@ struct __packed uc_fw_blob { #define HUC_FW_BLOB_GSC(prefix_) \ UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_)) +#define GSC_FW_BLOB(prefix_, major_, minor_) \ + UC_FW_BLOB_NEW(major_, minor_, 0, true, \ + MAKE_GSC_FW_PATH(prefix_, major_, minor_)) + struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ @@ -251,9 +270,14 @@ static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) }; +static const struct uc_fw_platform_requirement blobs_gsc[] = { + INTEL_GSC_FIRMWARE_DEFS(MAKE_FW_LIST, GSC_FW_BLOB) +}; + static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + [INTEL_UC_FW_TYPE_GSC] = { blobs_gsc, ARRAY_SIZE(blobs_gsc) }, }; static void @@ -266,14 +290,6 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) int i; bool found; - /* - * GSC FW support is still not fully in place, so we're not defining - * the FW blob yet because we don't want the driver to attempt to load - * it until we're ready for it. - */ - if (uc_fw->type == INTEL_UC_FW_TYPE_GSC) - return; - /* * The only difference between the ADL GuC FWs is the HWConfig support. * ADL-N does not support HWConfig, so we should use the same binary as From 7467e1da906468bcbd311023b30708193103ecf9 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 28 Aug 2023 12:04:50 +0530 Subject: [PATCH 17/41] drm/i915/mtl: Update workaround 14016712196 Now this workaround is permanent workaround on MTL and DG2, earlier we used to apply on MTL A0 step only. VLK-45480 Fixes: d922b80b1010 ("drm/i915/gt: Add workaround 14016712196") Signed-off-by: Tejas Upadhyay Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230828063450.2642748-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 67a6a13bf39b..7f50d02cd67f 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || - IS_GFX_GT_IP_STEP(rq->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(rq->i915)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -810,8 +810,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); From 280410677af763f3871b93e794a199cfcf6fb580 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 21 Aug 2023 17:30:35 +0200 Subject: [PATCH 18/41] drm/i915: mark requests for GuC virtual engines to avoid use-after-free References to i915_requests may be trapped by userspace inside a sync_file or dmabuf (dma-resv) and held indefinitely across different proceses. To counter-act the memory leaks, we try to not to keep references from the request past their completion. On the other side on fence release we need to know if rq->engine is valid and points to hw engine (true for non-virtual requests). To make it possible extra bit has been added to rq->execution_mask, for marking virtual engines. Fixes: bcb9aa45d5a0 ("Revert "drm/i915: Hold reference to intel_context over life of i915_request"") Signed-off-by: Chris Wilson Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230821153035.3903006-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +++ drivers/gpu/drm/i915/i915_request.c | 7 ++----- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e99a6fa03d45..a7e677598004 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -58,6 +58,7 @@ struct i915_perf_group; typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) +#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1) struct intel_hw_status_page { struct list_head timelines; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b2150a962f69..cabdc645fcdd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -5468,6 +5468,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, ve->base.flags = I915_ENGINE_IS_VIRTUAL; + BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); + ve->base.mask = VIRTUAL_ENGINES; + intel_context_init(&ve->context, &ve->base); for (n = 0; n < count; n++) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7c7da284990d..f59081066a19 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -134,9 +134,7 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure - * do not use with virtual engines as this really is only needed for - * kernel contexts. + * Keep one request on each engine for reserved use under mempressure. * * We do not hold a reference to the engine here and so have to be * very careful in what rq->engine we poke. The virtual engine is @@ -166,8 +164,7 @@ static void i915_fence_release(struct dma_fence *fence) * know that if the rq->execution_mask is a single bit, rq->engine * can be a physical engine with the exact corresponding mask. */ - if (!intel_engine_is_virtual(rq->engine) && - is_power_of_2(rq->execution_mask) && + if (is_power_of_2(rq->execution_mask) && !cmpxchg(&rq->engine->request_pool, NULL, rq)) return; From fb4e4c5e38583a2c6526ce9fb81ddc1f0831087c Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Mon, 28 Aug 2023 12:28:52 -0700 Subject: [PATCH 19/41] drm/i915/gt: Wait longer for tasks in migrate selftest The thread_global_copy subtest of the live migrate selftest creates a large number of threads and waits 10ms for them all to start. This is not enough time to wait for the threaded tasks to start, as some may need to wait for additional ring space to be granted. Threads that do so are at risk of getting stopped (signaled) in the middle of waiting for additional space, which can result in ERESTARTSYS getting reported erroneously by i915_request_wait. Instead of waiting a flat 10ms for the threads to start, wait 10ms per thread. This grants enough of a buffer for each thread to wait for additional ring space when needed. Signed-off-by: Jonathan Cavitt Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230828192852.2894671-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/gt/selftest_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3def5ca72dec..1a34cbe04fb6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -710,7 +710,7 @@ static int threaded_migrate(struct intel_migrate *migrate, thread[i].tsk = tsk; } - msleep(10); /* start all threads before we kthread_stop() */ + msleep(10 * n_cpus); /* start all threads before we kthread_stop() */ for (i = 0; i < n_cpus; ++i) { struct task_struct *tsk = thread[i].tsk; From 4632e3209f4b4349ebe67597897045b1a8af9daa Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Fri, 1 Sep 2023 10:27:00 +0530 Subject: [PATCH 20/41] drm/i915: Add Wa_14015150844 Disables Atomic-chaining of Typed Writes. BSpec: 54040 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230901045700.2553994-1-shekhar.chauhan@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 0e4c638fcbbf..a00ff51c681d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1218,6 +1218,8 @@ #define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) +#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) + #define ICL_HDC_MODE MCR_REG(0xe5f4) #define EU_PERF_CNTL2 PERF_REG(0xe658) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 12ac218afb2a..e950efc75983 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2326,6 +2326,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(i915)) { + /* Wa_14015150844 */ + wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0, + _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES), + 0, true); + } + if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, From 94bcf876cb6a224685c750cefc6ca75c01d8db8f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 6 Sep 2023 17:03:55 -0700 Subject: [PATCH 21/41] drm/i915/mtl: Drop Wa_14017240301 Drop Wa_14017240301, which is only relevant to pre-production MTL hardware. Although we usually wait a little bit longer to start dropping pre-production workarounds for a platform, it was suggested to eliminate this one slightly earlier because it's a bit unusual/ugly: this workaround is a display-specific workaround that requires matching on the graphics/GT IP version instead of the display IP version. Suggested-by: Jani Nikula Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230907000354.3729827-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index c3b8a5a378f8..24db851e271e 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2169,11 +2169,6 @@ skl_plane_disable_flip_done(struct intel_plane *plane) static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915, enum pipe pipe, enum plane_id plane_id) { - /* Wa_14017240301 */ - if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) || - IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0)) - return false; - /* Wa_22011186057 */ if (IS_ADLP_DISPLAY_STEP(i915, STEP_A0, STEP_B0)) return false; From c795d2f40a29f3aa9a4ed811f7787bf2f78111f4 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 7 Sep 2023 14:58:08 +0200 Subject: [PATCH 22/41] drm/i915: Run relevant bits of debugfs drop_caches per GT Walk all GTs when doing the respective bits of drop_caches work. Signed-off-by: Tvrtko Ursulin Signed-off-by: Andi Shyti Reviewed-by: Rodrigo Vivi Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230907125808.186088-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7a90a2e32c9f..e9b79c2c37d8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -740,15 +740,19 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + struct intel_gt *gt; unsigned int flags; + unsigned int i; int ret; drm_dbg(&i915->drm, "Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); - ret = gt_drop_caches(to_gt(i915), val); - if (ret) - return ret; + for_each_gt(gt, i915, i) { + ret = gt_drop_caches(gt, val); + if (ret) + return ret; + } fs_reclaim_acquire(GFP_KERNEL); flags = memalloc_noreclaim_save(); From 3b2562dcf91d460753871415f9571effc7965fcf Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 12 Sep 2023 09:35:21 +0200 Subject: [PATCH 23/41] drm/i915/gt: skip WA verification for GEN7_MISCCPCTL on DG2 Some DG2 firmware locks this register for modification. Using wa_add with read_mask 0 allows to skip checks of such registers. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8945 Signed-off-by: Andrzej Hajda Reviewed-by: Nirmoy Das Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230912073521.2106162-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e950efc75983..fcde2e1562ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1596,8 +1596,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_14014830051:dg2 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); - /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + /* + * Wa_14015795083 + * Skip verification for possibly locked register. + */ + wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE, + 0, 0, false); /* Wa_18018781329 */ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); From c92ec50822fb84306d951520d81919328421acbd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Sep 2023 11:17:41 +0300 Subject: [PATCH 24/41] drm/i915/gt: Prevent error pointer dereference Move the check for "if (IS_ERR(obj))" in front of the call to i915_gem_object_set_cache_coherency() which dereferences "obj". Otherwise it will lead to a crash. Fixes: 43aa755eae2c ("drm/i915/mtl: Update cache coherency setting for context structure") Signed-off-by: Dan Carpenter Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/455b2279-2e08-4d00-9784-be56d8ee42e3@moroto.mountain --- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 967fe4d77a87..b99efa348ad1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1094,6 +1094,9 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj)) { obj = i915_gem_object_create_shmem(engine->i915, context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + /* * Wa_22016122933: For Media version 13.0, all Media GT shared * memory needs to be mapped as WC on CPU side and UC (PAT @@ -1102,8 +1105,6 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (intel_gt_needs_wa_22016122933(engine->gt)) i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); } - if (IS_ERR(obj)) - return ERR_CAST(obj); vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { From 98fa06e44e3a773f41935323ed1dae7012819b70 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Fri, 15 Sep 2023 01:50:00 +0530 Subject: [PATCH 25/41] drm/i915: Add Wa_18022495364 Invalidate instruction and State cache bit using INDIRECT_CTX on every gpu context switch for gen12. The goal of this workaround is to actually perform an explicit invalidation of that cache (by re-writing the register) during every GPU context switch, which is accomplished via a "workaround batchbuffer" that's attached to the context via INDIRECT_CTX. (Matt Roper) Please refer [1] for more reviews and comment on the same patch [1] https://patchwork.freedesktop.org/series/123377/ v2: - Remove extra parentheses from the condition (Lucas) - Align spacing and new line (Lucas) v3: - Fix commit message. v4: - Only Gen12 changes are kept and Remove DG2+ condition (Matt Roper) - Fix the commit message for r-b (Matt Roper) - Rename the register bit in define v5: - Move out this workaround from golden context init (Matt Roper) - Use INDIRECT_CTX to set bit on each GPU context switch (Matt Roper) v6: - Change IP Version base condition for Gen12 (Matt Roper) - Made imperative form of commit version messages (Suraj) - s/Added/Add in patch header (Suraj) v7: - In version descriptions s/Ropper/Roper (Matt Atwood) BSpec: 11354 Cc: Lucas De Marchi Cc: Matt Roper Cc: Suraj Kandpal Cc: Matt Atwood Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230914202000.1069884-1-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index a00ff51c681d..0d5260d126d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -164,6 +164,8 @@ #define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20d4) #define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2) #define GEN11_ENABLE_32_PLANE_MODE (1 << 7) +#define GEN12_CS_DEBUG_MODE2 _MMIO(0x20d8) +#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6) #define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) #define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b99efa348ad1..147b6f44ad56 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1333,6 +1333,15 @@ dg2_emit_draw_watermark_setting(u32 *cs) return cs; } +static u32 * +gen12_invalidate_state_cache(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2); + *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + return cs; +} + static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { @@ -1346,6 +1355,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_aux_table_inv(ce->engine, cs); + /* Wa_18022495364 */ + if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10))) + cs = gen12_invalidate_state_cache(cs); + /* Wa_16014892111 */ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || From 4485bd519f5d6d620a29d0547ff3c982bdeeb468 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Tue, 12 Sep 2023 14:22:47 -0700 Subject: [PATCH 26/41] i915/pmu: Move execlist stats initialization to execlist specific setup engine->stats is a union of execlist and guc stat objects. When execlist specific fields are initialized, the initial state of guc stats is affected. This results in bad busyness values when using GuC mode. Move the execlist initialization from common code to execlist specific code. Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230912212247.1828681-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 - drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index dfb69fc977a0..84a75c95f3f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -558,7 +558,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4d05321dc5b5..e8f42ec6b1b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3548,6 +3548,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + seqcount_init(&engine->stats.execlists.lock); + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine); From 4d938bb93ffd35b1ea664222bb625061d7c4c73b Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 16 Aug 2023 16:13:20 -0700 Subject: [PATCH 27/41] drm/i915/huc: silence injected failure in the load via GSC path If we can't load the HuC due to an injected failure, we don't want to throw and error and trip CI. Using the gt_probe_error macro for logging ensure that the error is only printed if it wasn't explicitly injected. v2: keep the line to less than 100 characters (checkpatch). Link: https://gitlab.freedesktop.org/drm/intel/-/issues/7061 Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Andi Shyti #v1 Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230816231320.1555190-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index 80bb00189865..e50112645950 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -9,6 +9,7 @@ #include #include "gem/i915_gem_lmem.h" +#include "gt/intel_gt_print.h" #include "i915_drv.h" #include "gt/intel_gt.h" @@ -155,7 +156,8 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); struct intel_pxp *pxp = i915->pxp; - struct intel_uc *uc = &pxp->ctrl_gt->uc; + struct intel_gt *gt = pxp->ctrl_gt; + struct intel_uc *uc = >->uc; intel_wakeref_t wakeref; int ret = 0; @@ -175,7 +177,7 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, /* load huc via pxp */ ret = intel_huc_fw_load_and_auth_via_gsc(&uc->huc); if (ret < 0) - drm_err(&i915->drm, "failed to load huc via gsc %d\n", ret); + gt_probe_error(gt, "failed to load huc via gsc %d\n", ret); } } From 8ae272348153ed2fa423f739047a592d9bd55ba2 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Sun, 17 Sep 2023 14:19:31 -0700 Subject: [PATCH 28/41] drm/i915/pxp/mtl: Update pxp-firmware response timeout Update the max GSC-fw response time to match updated internal fw specs. Because this response time is an SLA on the firmware, not inclusive of i915->GuC->HW handoff latency, when submitting requests to the GSC fw via intel_gsc_uc_heci_cmd_submit helpers, start the count after the request hits the GSC command streamer. Also, move GSC_REPLY_LATENCY_MS definition from pxp header to intel_gsc_uc_heci_cmd_submit.h since its for any GSC HECI packet. Signed-off-by: Alan Previn Reviewed-by: Vivaik Balasubrawmanian Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230917211933.1407559-2-alan.previn.teres.alexis@intel.com --- .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c | 20 +++++++++++++++++-- .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h | 6 ++++++ drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 2 +- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 10 ++++------ 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index 89ed5ee9cded..2fde5c360cff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -81,8 +81,17 @@ int intel_gsc_uc_heci_cmd_submit_packet(struct intel_gsc_uc *gsc, u64 addr_in, i915_request_add(rq); - if (!err && i915_request_wait(rq, 0, msecs_to_jiffies(500)) < 0) - err = -ETIME; + if (!err) { + /* + * Start timeout for i915_request_wait only after considering one possible + * pending GSC-HECI submission cycle on the other (non-privileged) path. + */ + if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS)) + drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm, + "Delay in gsc-heci-priv submission to gsccs-hw"); + if (i915_request_wait(rq, 0, msecs_to_jiffies(GSC_HECI_REPLY_LATENCY_MS)) < 0) + err = -ETIME; + } i915_request_put(rq); @@ -186,6 +195,13 @@ intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc, i915_request_add(rq); if (!err) { + /* + * Start timeout for i915_request_wait only after considering one possible + * pending GSC-HECI submission cycle on the other (privileged) path. + */ + if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS)) + drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm, + "Delay in gsc-heci-non-priv submission to gsccs-hw"); if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, msecs_to_jiffies(timeout_ms)) < 0) err = -ETIME; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h index 09d3fbdad05a..c4308291c003 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h @@ -12,6 +12,12 @@ struct i915_vma; struct intel_context; struct intel_gsc_uc; +#define GSC_HECI_REPLY_LATENCY_MS 500 +/* + * Max FW response time is 500ms, but this should be counted from the time the + * command has hit the GSC-CS hardware, not the preceding handoff to GuC CTB. + */ + struct intel_gsc_mtl_header { u32 validity_marker; #define GSC_HECI_VALIDITY_MARKER 0xA578875A diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c index 2a600184a077..9b2cc0db2f55 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c @@ -111,7 +111,7 @@ gsccs_send_message(struct intel_pxp *pxp, ret = intel_gsc_uc_heci_cmd_submit_nonpriv(>->uc.gsc, exec_res->ce, &pkt, exec_res->bb_vaddr, - GSC_REPLY_LATENCY_MS); + GSC_HECI_REPLY_LATENCY_MS); if (ret) { drm_err(&i915->drm, "failed to send gsc PXP msg (%d)\n", ret); goto unlock; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 298ad38e6c7d..9aae779c4da3 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -8,16 +8,14 @@ #include +#include "gt/uc/intel_gsc_uc_heci_cmd_submit.h" + struct intel_pxp; -#define GSC_REPLY_LATENCY_MS 210 -/* - * Max FW response time is 200ms, to which we add 10ms to account for overhead - * such as request preparation, GuC submission to hw and pipeline completion times. - */ #define GSC_PENDING_RETRY_MAXCOUNT 40 #define GSC_PENDING_RETRY_PAUSE_MS 50 -#define GSCFW_MAX_ROUND_TRIP_LATENCY_MS (GSC_PENDING_RETRY_MAXCOUNT * GSC_PENDING_RETRY_PAUSE_MS) +#define GSCFW_MAX_ROUND_TRIP_LATENCY_MS (GSC_HECI_REPLY_LATENCY_MS + \ + (GSC_PENDING_RETRY_MAXCOUNT * GSC_PENDING_RETRY_PAUSE_MS)) #ifdef CONFIG_DRM_I915_PXP void intel_pxp_gsccs_fini(struct intel_pxp *pxp); From c14d446e25fe00a9fd29d317b07bd221fd6f49db Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Sun, 17 Sep 2023 14:19:32 -0700 Subject: [PATCH 29/41] drm/i915/pxp/mtl: Update pxp-firmware packet size Update the GSC-fw input/output HECI packet size to match updated internal fw specs. Signed-off-by: Alan Previn Reviewed-by: Vivaik Balasubrawmanian Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230917211933.1407559-3-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h index 0165d38fbead..329b4fcdc040 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h @@ -14,8 +14,8 @@ #define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */ #define PXP43_CMDID_INIT_SESSION 0x00000036 -/* PXP-Packet sizes for MTL's GSCCS-HECI instruction */ -#define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K) +/* PXP-Packet sizes for MTL's GSCCS-HECI instruction is spec'd at 65K before page alignment*/ +#define PXP43_MAX_HECI_INOUT_SIZE (PAGE_ALIGN(SZ_64K + SZ_1K)) /* PXP-Packet size for MTL's NEW_HUC_AUTH instruction */ #define PXP43_HUC_AUTH_INOUT_SIZE (SZ_4K) From afddcbe41f049072b3c62d39ce474bd71b3a5212 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Sun, 17 Sep 2023 14:19:33 -0700 Subject: [PATCH 30/41] drm/i915/lrc: User PXP contexts requires runalone bit in lrc On Meteorlake onwards, HW specs require that all user contexts that run on render or compute engines and require PXP must enforce run-alone bit in lrc. Add this enforcement for protected contexts. Signed-off-by: Alan Previn Reviewed-by: Vivaik Balasubrawmanian Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230917211933.1407559-4-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 23 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 6b9d9f837669..fdd4ddd3a978 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -177,6 +177,7 @@ #define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) +#define GEN12_CTX_CTRL_RUNALONE_MODE REG_BIT(7) #define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8) #define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244) #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 147b6f44ad56..eaf66d903166 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -845,6 +845,27 @@ lrc_setup_indirect_ctx(u32 *regs, lrc_ring_indirect_offset_default(engine) << 6; } +static bool ctx_needs_runalone(const struct intel_context *ce) +{ + struct i915_gem_context *gem_ctx; + bool ctx_is_protected = false; + + /* + * On MTL and newer platforms, protected contexts require setting + * the LRC run-alone bit or else the encryption will not happen. + */ + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) && + (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) { + rcu_read_lock(); + gem_ctx = rcu_dereference(ce->gem_context); + if (gem_ctx) + ctx_is_protected = gem_ctx->uses_protected_content; + rcu_read_unlock(); + } + + return ctx_is_protected; +} + static void init_common_regs(u32 * const regs, const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -860,6 +881,8 @@ static void init_common_regs(u32 * const regs, if (GRAPHICS_VER(engine->i915) < 11) ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); + if (ctx_needs_runalone(ce)) + ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE); regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_TIMESTAMP] = ce->stats.runtime.last; From 5642639bd4f772a09bc4cb05ac7ac9378c2c3f5e Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Tue, 19 Sep 2023 04:45:31 +0000 Subject: [PATCH 31/41] drm/i915: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. We should prefer more robust and less ambiguous string interfaces. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on the destination buffer without unnecessarily NUL-padding. `ctx` is zero allocated and as such strncpy's NUL-padding behavior was strictly a performance hit which is now resolved. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Signed-off-by: Justin Stitt Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/selftests/mock_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 8ac6726ec16b..e199d7dbb876 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -36,7 +36,7 @@ mock_context(struct drm_i915_private *i915, if (name) { struct i915_ppgtt *ppgtt; - strncpy(ctx->name, name, sizeof(ctx->name) - 1); + strscpy(ctx->name, name, sizeof(ctx->name)); ppgtt = mock_ppgtt(i915, name); if (!ppgtt) From 26a8e32e6d77900819c0c730fbfb393692dbbeea Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 19 Sep 2023 20:48:55 +0100 Subject: [PATCH 32/41] i915: Limit the length of an sg list to the requested length The folio conversion changed the behaviour of shmem_sg_alloc_table() to put the entire length of the last folio into the sg list, even if the sg list should have been shorter. gen8_ggtt_insert_entries() relied on the list being the right length and would overrun the end of the page tables. Other functions may also have been affected. Clamp the length of the last entry in the sg list to be the expected length. Signed-off-by: Matthew Wilcox (Oracle) Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch") Cc: stable@vger.kernel.org # 6.5.x Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9256 Link: https://lore.kernel.org/lkml/6287208.lOV4Wx5bFT@natalenko.name/ Reported-by: Oleksandr Natalenko Tested-by: Oleksandr Natalenko Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230919194855.347582-1-willy@infradead.org --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 8f1633c3fb93..73a4a4eb29e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, st->nents = 0; for (i = 0; i < page_count; i++) { struct folio *folio; + unsigned long nr_pages; const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 0, @@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, } } while (1); + nr_pages = min_t(unsigned long, + folio_nr_pages(folio), page_count - i); if (!i || sg->length >= max_segment || folio_pfn(folio) != next_pfn) { @@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, sg = sg_next(sg); st->nents++; - sg_set_folio(sg, folio, folio_size(folio), 0); + sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); } else { /* XXX: could overflow? */ - sg->length += folio_size(folio); + sg->length += nr_pages * PAGE_SIZE; } - next_pfn = folio_pfn(folio) + folio_nr_pages(folio); - i += folio_nr_pages(folio) - 1; + next_pfn = folio_pfn(folio) + nr_pages; + i += nr_pages - 1; /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL); From b17e6840882dc8a04e7464270906d79954378d41 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Wed, 20 Sep 2023 14:36:20 +0530 Subject: [PATCH 33/41] drm/i915/gt: Update RC6 mask for mtl_drpc It has been observed sometimes RC6 status register's unused bits are being set by h/w, without affecting RC6 functionality therefore updating the mask with used bits accordingly. As mtl_drpc is debugfs function, removing MISSING_CASE from default case as it doesn't make sense to panic (panic_on_warn=1) the CI system if register is reporting unsupported state. Cc: Anshuman Gupta Signed-off-by: Badal Nilawar Reviewed-by: Anshuman Gupta Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20230920090620.3255091-1-badal.nilawar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 1 - drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 357e2f865727..f900cc68d6d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -290,7 +290,6 @@ static int mtl_drpc(struct seq_file *m) seq_puts(m, "RC6\n"); break; default: - MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status)); seq_puts(m, "Unknown\n"); break; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 0d5260d126d8..9f2a7d103ea5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -26,7 +26,7 @@ #define MTL_CAGF_MASK REG_GENMASK(8, 0) #define MTL_CC0 0x0 #define MTL_CC6 0x3 -#define MTL_CC_MASK REG_GENMASK(12, 9) +#define MTL_CC_MASK REG_GENMASK(10, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) From 0f3fa942d91165c2702577e9274d2ee1c7212afc Mon Sep 17 00:00:00 2001 From: Javier Pello Date: Sat, 2 Sep 2023 17:10:39 +0200 Subject: [PATCH 34/41] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top There is an assertion in ggtt_reserve_guc_top that the global GTT is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc ("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the subsequent reservation fails and the driver fails to initialise the device: i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC i915 0000:00:02.0: Device initialization failed (-28) i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details. i915: probe of 0000:00:02.0 failed with error -28 Make the reservation at the top of the available space, whatever that is, instead of assuming that the top will be GUC_GGTT_TOP. Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080 Signed-off-by: Javier Pello Reviewed-by: Daniele Ceraolo Spurio Cc: Fernando Pacheco Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697dbcf62d6d8@otheo.eu --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n"); From 82b1e8f7ff6ab0b3fe43fbf10ab720e06db0cfd2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 21 Sep 2023 19:24:56 +0300 Subject: [PATCH 35/41] drm/i915/gt: remove a static inline that requires including i915_drv.h It's actively harmful to add static inlines in headers that require you to pull in more headers. Remove the include added in commit f1530f912ed8 ("drm/i915/gt: Apply workaround 22016122933 correctly"). We see that there's already an implicit dependency on the i915_drv.h that we need to address too. Cc: Andi Shyti Cc: Fei Yang Cc: Jonathan Cavitt Cc: Matt Roper Signed-off-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230921162456.3889375-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 5 +++++ drivers/gpu/drm/i915/gt/intel_gt.h | 6 +----- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d234122f8024..bc0ead0b7a0a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1019,3 +1019,8 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, else return I915_MAP_WC; } + +bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) +{ + return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 239848bcb2a4..2cac499d5aa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -6,7 +6,6 @@ #ifndef __INTEL_GT__ #define __INTEL_GT__ -#include "i915_drv.h" #include "intel_engine_types.h" #include "intel_gt_types.h" #include "intel_reset.h" @@ -88,10 +87,7 @@ static inline bool gt_is_root(struct intel_gt *gt) return !gt->info.id; } -static inline bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) -{ - return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; -} +bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 11d181b1cc7a..bf4a933de03a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -3,6 +3,7 @@ * Copyright © 2022 Intel Corporation */ +#include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" From 2fc37c0c59c925ac1e60c007670b9921565005a7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 21 Sep 2023 19:06:37 +0300 Subject: [PATCH 36/41] drm/i915/gem: remove inlines from i915_gem_execbuffer.c Just let the compiler decide what's best. Turns out absolutely nothing changes in the output with the inlines removed. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230921160637.3862597-1-jani.nikula@intel.com --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5a687a3686bd..a4b32567c519 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -321,7 +321,7 @@ static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); static void eb_capture_release(struct i915_execbuffer *eb); -static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +static bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->context->engine) || (intel_engine_using_cmd_parser(eb->context->engine) && @@ -433,7 +433,7 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, return pin_flags; } -static inline int +static int eb_pin_vma(struct i915_execbuffer *eb, const struct drm_i915_gem_exec_object2 *entry, struct eb_vma *ev) @@ -486,7 +486,7 @@ eb_pin_vma(struct i915_execbuffer *eb, return 0; } -static inline void +static void eb_unreserve_vma(struct eb_vma *ev) { if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) @@ -548,7 +548,7 @@ eb_validate_vma(struct i915_execbuffer *eb, return 0; } -static inline bool +static bool is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx) { return eb->args->flags & I915_EXEC_BATCH_FIRST ? @@ -628,8 +628,8 @@ eb_add_vma(struct i915_execbuffer *eb, return 0; } -static inline int use_cpu_reloc(const struct reloc_cache *cache, - const struct drm_i915_gem_object *obj) +static int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) { if (!i915_gem_object_has_struct_page(obj)) return false; @@ -1107,7 +1107,7 @@ static void eb_destroy(const struct i915_execbuffer *eb) kfree(eb->buckets); } -static inline u64 +static u64 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, const struct i915_vma *target) { @@ -1128,19 +1128,19 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->node.flags = 0; } -static inline void *unmask_page(unsigned long p) +static void *unmask_page(unsigned long p) { return (void *)(uintptr_t)(p & PAGE_MASK); } -static inline unsigned int unmask_flags(unsigned long p) +static unsigned int unmask_flags(unsigned long p) { return p & ~PAGE_MASK; } #define KMAP 0x4 /* after CLFLUSH_FLAGS */ -static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +static struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) { struct drm_i915_private *i915 = container_of(cache, struct i915_execbuffer, reloc_cache)->i915; From ae0e5e6eaaabd54377fe6f649d49ff5fbbc58d95 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Fri, 22 Sep 2023 21:23:56 +0530 Subject: [PATCH 37/41] drm/i915: Add Wa_18028616096 Drop UGM per set fragment threshold to 3 BSpec: 54833 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper [mattrope: moved above xehpsdv block for consistency] Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230922155356.583595-1-shekhar.chauhan@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 9f2a7d103ea5..cca4bac8f8b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1232,6 +1232,7 @@ #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) +#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index fcde2e1562ab..0ddddccc4354 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2941,6 +2941,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li true); } + if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) { + /* Wa_18028616096 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); + } + if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_mcr_masked_en(wal, From a383a021804ce5eb8fada3ee83ce8a74077fe9b9 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 19 Sep 2023 21:02:11 -0700 Subject: [PATCH 38/41] drm/i915/perf: Remove gtt_offset from stream->oa_buffer.head/.tail There is no reason to add gtt_offset to the cached head/tail pointers stream->oa_buffer.head and stream->oa_buffer.tail. This causes the code to constantly add gtt_offset and subtract gtt_offset and is error prone. It is much simpler to maintain stream->oa_buffer.head and stream->oa_buffer.tail without adding gtt_offset to them and just allow for the gtt_offset when reading/writing from/to HW registers. v2: Minor tweak to commit message due to dropping patch in previous series Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230920040211.2351279-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 52 ++++++++------------------------ 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 018f42fff4cc..1347e4ec9dd5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -543,10 +543,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); int report_size = stream->oa_buffer.format->size; - u32 head, tail, read_tail; + u32 tail, hw_tail; unsigned long flags; bool pollin; - u32 hw_tail; u32 partial_report_size; /* We have to consider the (unlikely) possibility that read() errors @@ -556,6 +555,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); hw_tail = stream->perf->ops.oa_hw_tail_read(stream); + hw_tail -= gtt_offset; /* The tail pointer increases in 64 byte increments, not in report_size * steps. Also the report size may not be a power of 2. Compute @@ -567,13 +567,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) /* Subtract partial amount off the tail */ hw_tail = OA_TAKEN(hw_tail, partial_report_size); - /* NB: The head we observe here might effectively be a little - * out of date. If a read() is in progress, the head could be - * anywhere between this head and stream->oa_buffer.tail. - */ - head = stream->oa_buffer.head - gtt_offset; - read_tail = stream->oa_buffer.tail - gtt_offset; - tail = hw_tail; /* Walk the stream backward until we find a report with report @@ -587,7 +580,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) * memory in the order they were written to. * If not : (╯°□°)╯︵ ┻━┻ */ - while (OA_TAKEN(tail, read_tail) >= report_size) { + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { void *report = stream->oa_buffer.vaddr + tail; if (oa_report_id(stream, report) || @@ -601,9 +594,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) __ratelimit(&stream->perf->tail_pointer_race)) drm_notice(&stream->uncore->i915->drm, "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", - head, tail, hw_tail); + stream->oa_buffer.head, tail, hw_tail); - stream->oa_buffer.tail = gtt_offset + tail; + stream->oa_buffer.tail = tail; pollin = OA_TAKEN(stream->oa_buffer.tail, stream->oa_buffer.head) >= report_size; @@ -753,13 +746,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - /* - * NB: oa_buffer.head/tail include the gtt_offset which we don't want - * while indexing relative to oa_buf_base. - */ - head -= gtt_offset; - tail -= gtt_offset; - /* * An out of bounds or misaligned head or tail pointer implies a driver * bug since we validate + align the tail pointers we read from the @@ -895,9 +881,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... */ - head += gtt_offset; intel_uncore_write(uncore, oaheadptr, - head & GEN12_OAG_OAHEADPTR_MASK); + (head + gtt_offset) & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1042,12 +1027,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - /* NB: oa_buffer.head/tail include the gtt_offset which we don't want - * while indexing relative to oa_buf_base. - */ - head -= gtt_offset; - tail -= gtt_offset; - /* An out of bounds or misaligned head or tail pointer implies a driver * bug since we validate + align the tail pointers we read from the * hardware and we are in full control of the head pointer which should @@ -1110,13 +1089,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, if (start_offset != *offset) { spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - /* We removed the gtt_offset for the copy loop above, indexing - * relative to oa_buf_base so put back here... - */ - head += gtt_offset; - intel_uncore_write(uncore, GEN7_OASTATUS2, - (head & GEN7_OASTATUS2_HEAD_MASK) | + ((head + gtt_offset) & GEN7_OASTATUS2_HEAD_MASK) | GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = head; @@ -1704,7 +1678,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) */ intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); - stream->oa_buffer.head = gtt_offset; + stream->oa_buffer.head = 0; intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); @@ -1712,7 +1686,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) gtt_offset | OABUFFER_SIZE_16M); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tail = gtt_offset; + stream->oa_buffer.tail = 0; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1746,7 +1720,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) intel_uncore_write(uncore, GEN8_OASTATUS, 0); intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); - stream->oa_buffer.head = gtt_offset; + stream->oa_buffer.head = 0; intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); @@ -1763,7 +1737,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tail = gtt_offset; + stream->oa_buffer.tail = 0; /* * Reset state used to recognise context switches, affecting which @@ -1800,7 +1774,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0); intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr, gtt_offset & GEN12_OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = gtt_offset; + stream->oa_buffer.head = 0; /* * PRM says: @@ -1816,7 +1790,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) gtt_offset & GEN12_OAG_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tail = gtt_offset; + stream->oa_buffer.tail = 0; /* * Reset state used to recognise context switches, affecting which From e2f99b79d4c594cdf7ab449e338d4947f5ea8903 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 25 Sep 2023 12:21:17 -0700 Subject: [PATCH 39/41] i915/guc: Get runtime pm in busyness worker only if already active Ideally the busyness worker should take a gt pm wakeref because the worker only needs to be active while gt is awake. However, the gt_park path cancels the worker synchronously and this complicates the flow if the worker is also running at the same time. The cancel waits for the worker and when the worker releases the wakeref, that would call gt_park and would lead to a deadlock. The resolution is to take the global pm wakeref if runtime pm is already active. If not, we don't need to update the busyness stats as the stats would already be updated when the gt was parked. Note: - We do not requeue the worker if we cannot take a reference to runtime pm since intel_guc_busyness_unpark would requeue the worker in the resume path. - If the gt was parked longer than time taken for GT timestamp to roll over, we ignore those rollovers since we don't care about tracking the exact GT time. We only care about roll overs when the gt is active and running workloads. - There is a window of time between gt_park and runtime suspend, where the worker may run. This is acceptable since the worker will not find any new data to update busyness. v2: (Daniele) - Edit commit message and code comment - Use runtime pm in the worker - Put runtime pm after enabling the worker - Use Link tag and add Fixes tag v3: (Daniele) - Reword commit and comments and add details Link: https://gitlab.freedesktop.org/drm/intel/-/issues/7077 Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230925192117.2497058-1-umesh.nerlige.ramappa@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cabdc645fcdd..ae3495a9c814 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1432,6 +1432,36 @@ static void guc_timestamp_ping(struct work_struct *wrk) unsigned long index; int srcu, ret; + /* + * Ideally the busyness worker should take a gt pm wakeref because the + * worker only needs to be active while gt is awake. However, the + * gt_park path cancels the worker synchronously and this complicates + * the flow if the worker is also running at the same time. The cancel + * waits for the worker and when the worker releases the wakeref, that + * would call gt_park and would lead to a deadlock. + * + * The resolution is to take the global pm wakeref if runtime pm is + * already active. If not, we don't need to update the busyness stats as + * the stats would already be updated when the gt was parked. + * + * Note: + * - We do not requeue the worker if we cannot take a reference to runtime + * pm since intel_guc_busyness_unpark would requeue the worker in the + * resume path. + * + * - If the gt was parked longer than time taken for GT timestamp to roll + * over, we ignore those rollovers since we don't care about tracking + * the exact GT time. We only care about roll overs when the gt is + * active and running workloads. + * + * - There is a window of time between gt_park and runtime suspend, + * where the worker may run. This is acceptable since the worker will + * not find any new data to update busyness. + */ + wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); + if (!wakeref) + return; + /* * Synchronize with gt reset to make sure the worker does not * corrupt the engine/guc stats. NB: can't actually block waiting @@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) - return; + goto err_trylock; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - __update_guc_busyness_stats(guc); + __update_guc_busyness_stats(guc); /* adjust context stats for overflow */ xa_for_each(&guc->context_lookup, index, ce) @@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) intel_gt_reset_unlock(gt, srcu); guc_enable_busyness_worker(guc); + +err_trylock: + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } static int guc_action_enable_usage_stats(struct intel_guc *guc) From 1e975e591af98b45a9e37eabfd4bb01a6184c314 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 26 Sep 2023 11:08:55 +0100 Subject: [PATCH 40/41] drm/i915: Do not disable preemption for resets Commit ade8a0f59844 ("drm/i915: Make all GPU resets atomic") added a preempt disable section over the hardware reset callback to prepare the driver for being able to reset from atomic contexts. In retrospect I can see that the work item at a time was about removing the struct mutex from the reset path. Code base also briefly entertained the idea of doing the reset under stop_machine in order to serialize userspace mmap and temporary glitch in the fence registers (see eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex"), but that never materialized and was soon removed in 2caffbf11762 ("drm/i915: Revoke mmaps and prevent access to fence registers across reset") and replaced with a SRCU based solution. As such, as far as I can see, today we still have a requirement that resets must not sleep (invoked from submission tasklets), but no need to support invoking them from a truly atomic context. Given that the preemption section is problematic on RT kernels, since the uncore lock becomes a sleeping lock and so is invalid in such section, lets try and remove it. Potential downside is that our short waits on GPU to complete the reset may get extended if CPU scheduling interferes, but in practice that probably isn't a deal breaker. In terms of mechanics, since the preemption disabled block is being removed we just need to replace a few of the wait_for_atomic macros into busy looping versions which will work (and not complain) when called from non-atomic sections. v2: * Fix timeouts which are now in us. (Andi) * Update one comment as a drive by. (Andi) Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Paul Gortmaker Cc: Sebastian Andrzej Siewior Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230926100855.61722-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 98575d79c446..a21e939fdbf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -161,16 +161,16 @@ static int i915_do_reset(struct intel_gt *gt, struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); int err; - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + /* Assert reset for at least 50 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); udelay(50); - err = wait_for_atomic(i915_in_reset(pdev), 50); + err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0); /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); udelay(50); if (!err) - err = wait_for_atomic(!i915_in_reset(pdev), 50); + err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0); return err; } @@ -190,7 +190,7 @@ static int g33_do_reset(struct intel_gt *gt, struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for_atomic(g4x_reset_complete(pdev), 50); + return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); } static int g4x_do_reset(struct intel_gt *gt, @@ -207,7 +207,7 @@ static int g4x_do_reset(struct intel_gt *gt, pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); if (ret) { GT_TRACE(gt, "Wait for media reset failed\n"); goto out; @@ -215,7 +215,7 @@ static int g4x_do_reset(struct intel_gt *gt, pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); if (ret) { GT_TRACE(gt, "Wait for render reset failed\n"); goto out; @@ -785,9 +785,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) reset_mask = wa_14015076503_start(gt, engine_mask, !retry); GT_TRACE(gt, "engine_mask=%x\n", reset_mask); - preempt_disable(); ret = reset(gt, reset_mask, retry); - preempt_enable(); wa_14015076503_end(gt, reset_mask); } From 03d681412b38558aefe4fb0f46e36efa94bb21ef Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 26 Sep 2023 16:24:01 +0200 Subject: [PATCH 41/41] drm/i915: Don't set PIPE_CONTROL_FLUSH_L3 for aux inval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PIPE_CONTROL_FLUSH_L3 is not needed for aux invalidation so don't set that. Fixes: ad8ebf12217e ("drm/i915/gt: Ensure memory quiesced before invalidation") Cc: Jonathan Cavitt Cc: Andi Shyti Cc: # v5.8+ Cc: Andrzej Hajda Cc: Tvrtko Ursulin Cc: Matt Roper Cc: Tejas Upadhyay Cc: Lucas De Marchi Cc: Prathap Kumar Valsan Cc: Tapani Pälli Cc: Mark Janes Cc: Rodrigo Vivi Signed-off-by: Nirmoy Das Acked-by: Matt Roper Reviewed-by: Andi Shyti Tested-by: Tapani Pälli Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230926142401.25687-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 7f50d02cd67f..8b93cbe0f535 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -271,8 +271,17 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + /* + * L3 fabric flush is needed for AUX CCS invalidation + * which happens as part of pipe-control so we can + * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 + * deals with Protected Memory which is not needed for + * AUX CCS invalidation and lead to unwanted side effects. + */ + if (mode & EMIT_FLUSH) + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; - bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */