From c25feb24e40c13daa13c5d87a0feccfc842a3d15 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Jan 2023 23:02:16 -0800 Subject: [PATCH 01/45] drm/atomic-helper: fix kernel-doc problems Fix a kernel-doc warning and other kernel-doc formatting for drm_atomic_helper_connect_tv_check(). drivers/gpu/drm/drm_atomic_state_helper.c:560: warning: Cannot understand * @drm_atomic_helper_connector_tv_check: Validate an analog TV connector state on line 560 - I thought it was a doc line Fixes: 5a28cefda3a9 ("drm/atomic-helper: Add an analog TV atomic_check implementation") Signed-off-by: Randy Dunlap Cc: Maxime Ripard Cc: Maarten Lankhorst CC: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230117070216.30318-1-rdunlap@infradead.org --- drivers/gpu/drm/drm_atomic_state_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 22251c5f6a8a..784e63d70a42 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -557,15 +557,15 @@ void drm_atomic_helper_connector_tv_reset(struct drm_connector *connector) EXPORT_SYMBOL(drm_atomic_helper_connector_tv_reset); /** - * @drm_atomic_helper_connector_tv_check: Validate an analog TV connector state + * drm_atomic_helper_connector_tv_check - Validate an analog TV connector state * @connector: DRM Connector * @state: the DRM State object * * Checks the state object to see if the requested state is valid for an * analog TV connector. * - * Returns: - * Zero for success, a negative error code on error. + * Return: + * %0 for success, a negative error code on error. */ int drm_atomic_helper_connector_tv_check(struct drm_connector *connector, struct drm_atomic_state *state) From 04ee27671a6a699a8429adc7be9fa93ff228031e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Jan 2023 23:02:24 -0800 Subject: [PATCH 02/45] drm/connector: fix a kernel-doc bad line warning Building the kernel documentation causes this warning 7 times. Fix it by adding a " *" line instead of a blank line. drivers/gpu/drm/drm_connector.c:1849: warning: bad line: Fixes: 7d63cd8526f1 ("drm/connector: Add TV standard property") Signed-off-by: Randy Dunlap Cc: Maxime Ripard Cc: Maarten Lankhorst CC: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Reviewed-by: Simon Ser Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230117070224.30751-1-rdunlap@infradead.org --- drivers/gpu/drm/drm_connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 8d92777e57dd..86abee85b92a 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1846,7 +1846,7 @@ EXPORT_SYMBOL(drm_mode_create_tv_properties_legacy); * drm_mode_create_tv_properties - create TV specific connector properties * @dev: DRM device * @supported_tv_modes: Bitmask of TV modes supported (See DRM_MODE_TV_MODE_*) - + * * Called by a driver's TV initialization routine, this function creates * the TV specific connector properties for a given device. * From 065ff1dc8764dd6394bb6f6c42e6b1e4fac74911 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2023 17:52:51 +0100 Subject: [PATCH 03/45] vc4: fix build failure in vc4_dsi_dev_probe() The bridge->of_node field is defined inside of an #ifdef, which results in a build failure when compile-testing the vc4_dsi driver without CONFIG_OF: drivers/gpu/drm/vc4/vc4_dsi.c: In function 'vc4_dsi_dev_probe': drivers/gpu/drm/vc4/vc4_dsi.c:1822:20: error: 'struct drm_bridge' has no member named 'of_node' 1822 | dsi->bridge.of_node = dev->of_node; Add another #ifdef in the place it is used in. Alternatively we could consider dropping the #ifdef in the struct definition and all other users. Fixes: 78df640394cd ("drm/vc4: dsi: Convert to using a bridge instead of encoder") Signed-off-by: Arnd Bergmann Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230117165258.1979922-1-arnd@kernel.org --- drivers/gpu/drm/vc4/vc4_dsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 2a71321b9222..a5c075f802e4 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -1819,7 +1819,9 @@ static int vc4_dsi_dev_probe(struct platform_device *pdev) dsi->pdev = pdev; dsi->bridge.funcs = &vc4_dsi_bridge_funcs; +#ifdef CONFIG_OF dsi->bridge.of_node = dev->of_node; +#endif dsi->bridge.type = DRM_MODE_CONNECTOR_DSI; dsi->dsi_host.ops = &vc4_dsi_host_ops; dsi->dsi_host.dev = dev; From 1cbc1f0d324ba6c4d1b10ac6362b5e0b029f63d5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:16 +0200 Subject: [PATCH 04/45] drm/edid: fix AVI infoframe aspect ratio handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We try to avoid sending VICs defined in the later specs in AVI infoframes to sinks that conform to the earlier specs, to not upset them, and use 0 for the VIC instead. However, we do this detection and conversion to 0 too early, as we'll need the actual VIC to figure out the aspect ratio. In particular, for a mode with 64:27 aspect ratio, 0 for VIC fails the AVI infoframe generation altogether with -EINVAL. Separate the VIC lookup from the "filtering", and postpone the filtering, to use the proper VIC for aspect ratio handling, and the 0 VIC for the infoframe video code as needed. Reported-by: William Tseng Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6153 References: https://lore.kernel.org/r/20220920062316.43162-1-william.tseng@intel.com Cc: Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/c3e78cc6d01ed237f71ad0038826b08d83d75eef.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3841aba17abd..23f7146e6a9b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6885,8 +6885,6 @@ static u8 drm_mode_hdmi_vic(const struct drm_connector *connector, static u8 drm_mode_cea_vic(const struct drm_connector *connector, const struct drm_display_mode *mode) { - u8 vic; - /* * HDMI spec says if a mode is found in HDMI 1.4b 4K modes * we should send its VIC in vendor infoframes, else send the @@ -6896,13 +6894,18 @@ static u8 drm_mode_cea_vic(const struct drm_connector *connector, if (drm_mode_hdmi_vic(connector, mode)) return 0; - vic = drm_match_cea_mode(mode); + return drm_match_cea_mode(mode); +} - /* - * HDMI 1.4 VIC range: 1 <= VIC <= 64 (CEA-861-D) but - * HDMI 2.0 VIC range: 1 <= VIC <= 107 (CEA-861-F). So we - * have to make sure we dont break HDMI 1.4 sinks. - */ +/* + * Avoid sending VICs defined in HDMI 2.0 in AVI infoframes to sinks that + * conform to HDMI 1.4. + * + * HDMI 1.4 (CTA-861-D) VIC range: [1..64] + * HDMI 2.0 (CTA-861-F) VIC range: [1..107] + */ +static u8 vic_for_avi_infoframe(const struct drm_connector *connector, u8 vic) +{ if (!is_hdmi2_sink(connector) && vic > 64) return 0; @@ -6978,7 +6981,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, picture_aspect = HDMI_PICTURE_ASPECT_NONE; } - frame->video_code = vic; + frame->video_code = vic_for_avi_infoframe(connector, vic); frame->picture_aspect = picture_aspect; frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE; frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN; From 72794d16bd535a984e6653a18f5862405b49b5f9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:17 +0200 Subject: [PATCH 05/45] drm/edid: fix parsing of 3D modes from HDMI VSDB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 537d9ed2f6c1 ("drm/edid: convert add_cea_modes() to use cea db iter") inadvertently moved the do_hdmi_vsdb_modes() call within the db iteration loop, always passing NULL as the CTA VDB to do_hdmi_vsdb_modes(), skipping a lot of stereo modes. Move the call back outside of the loop. This does mean only one CTA VDB and HDMI VSDB combination will be handled, but it's an unlikely scenario to have more than one of either block, and it was not accounted for before the regression either. Fixes: 537d9ed2f6c1 ("drm/edid: convert add_cea_modes() to use cea db iter") Cc: # v6.0+ Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/cf159b8816191ed595a3cb954acaf189c4528cc7.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 23f7146e6a9b..b94adb9bbefb 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5249,13 +5249,12 @@ static int add_cea_modes(struct drm_connector *connector, { const struct cea_db *db; struct cea_db_iter iter; + const u8 *hdmi = NULL, *video = NULL; + u8 hdmi_len = 0, video_len = 0; int modes = 0; cea_db_iter_edid_begin(drm_edid, &iter); cea_db_iter_for_each(db, &iter) { - const u8 *hdmi = NULL, *video = NULL; - u8 hdmi_len = 0, video_len = 0; - if (cea_db_tag(db) == CTA_DB_VIDEO) { video = cea_db_data(db); video_len = cea_db_payload_len(db); @@ -5271,18 +5270,17 @@ static int add_cea_modes(struct drm_connector *connector, modes += do_y420vdb_modes(connector, vdb420, cea_db_payload_len(db) - 1); } - - /* - * We parse the HDMI VSDB after having added the cea modes as we - * will be patching their flags when the sink supports stereo - * 3D. - */ - if (hdmi) - modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, - video, video_len); } cea_db_iter_end(&iter); + /* + * We parse the HDMI VSDB after having added the cea modes as we will be + * patching their flags when the sink supports stereo 3D. + */ + if (hdmi) + modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, + video, video_len); + return modes; } From c3292ab5fbd7045f019418b2ce1977891419ad28 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:18 +0200 Subject: [PATCH 06/45] drm/edid: parse VICs from CTA VDB early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A number of places need access to the VICs. Just parse them early for easy access. Gracefully handle multiple CTA VDBs. It's unlikely to have more than one, but the CTA-861 references "Video Data Block(s)", so err on the safe side. Start parsing them now, convert users in follow-up to have fewer moving parts in one go. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/7989b2b37837be68953c5d20afd3e93762bfd626.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_connector.c | 1 + drivers/gpu/drm/drm_edid.c | 36 +++++++++++++++++++++++++++++++++ include/drm/drm_connector.h | 10 +++++++++ 3 files changed, 47 insertions(+) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 86abee85b92a..9d0250c28e9b 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -565,6 +565,7 @@ void drm_connector_cleanup(struct drm_connector *connector) ida_free(&dev->mode_config.connector_ida, connector->index); kfree(connector->display_info.bus_formats); + kfree(connector->display_info.vics); drm_mode_object_unregister(dev, &connector->base); kfree(connector->name); connector->name = NULL; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index b94adb9bbefb..90846dc69061 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5862,6 +5862,36 @@ drm_default_rgb_quant_range(const struct drm_display_mode *mode) } EXPORT_SYMBOL(drm_default_rgb_quant_range); +/* CTA-861 Video Data Block (CTA VDB) */ +static void parse_cta_vdb(struct drm_connector *connector, const struct cea_db *db) +{ + struct drm_display_info *info = &connector->display_info; + int i, vic_index, len = cea_db_payload_len(db); + const u8 *svds = cea_db_data(db); + u8 *vics; + + if (!len) + return; + + /* Gracefully handle multiple VDBs, however unlikely that is */ + vics = krealloc(info->vics, info->vics_len + len, GFP_KERNEL); + if (!vics) + return; + + vic_index = info->vics_len; + info->vics_len += len; + info->vics = vics; + + for (i = 0; i < len; i++) { + u8 vic = svd_to_vic(svds[i]); + + if (!drm_valid_cea_vic(vic)) + vic = 0; + + info->vics[vic_index++] = vic; + } +} + static void drm_parse_vcdb(struct drm_connector *connector, const u8 *db) { struct drm_display_info *info = &connector->display_info; @@ -6205,6 +6235,8 @@ static void drm_parse_cea_ext(struct drm_connector *connector, drm_parse_vcdb(connector, data); else if (cea_db_is_hdmi_hdr_metadata_block(db)) drm_parse_hdr_metadata_block(connector, data); + else if (cea_db_tag(db) == CTA_DB_VIDEO) + parse_cta_vdb(connector, db); } cea_db_iter_end(&iter); } @@ -6372,6 +6404,10 @@ static void drm_reset_display_info(struct drm_connector *connector) info->mso_stream_count = 0; info->mso_pixel_overlap = 0; info->max_dsc_bpp = 0; + + kfree(info->vics); + info->vics = NULL; + info->vics_len = 0; } static u32 update_display_info(struct drm_connector *connector, diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 9037f1317aee..d97ed06d5837 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -721,6 +721,16 @@ struct drm_display_info { * monitor's default value is used instead. */ u32 max_dsc_bpp; + + /** + * @vics: Array of vics_len VICs. Internal to EDID parsing. + */ + u8 *vics; + + /** + * @vics_len: Number of elements in vics. Internal to EDID parsing. + */ + int vics_len; }; int drm_display_info_set_bus_formats(struct drm_display_info *info, From 6a40a75f71b997f2248664021f28af0dc7796d18 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:19 +0200 Subject: [PATCH 07/45] drm/edid: Use the pre-parsed VICs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have all the VICs in info->vics, use them to simplify access based on VIC index, i.e. on the order of VICs in the EDID, and avoid passing CTA VDB pointers around. This also fixes the highly unlikely scenarios of a) multiple HDMI VSDBs, and b) HDMI VSDB 3D modes using VIC indexes that span across multiple CTA VDBs, and the combination of the two. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/30f1a97193171e70ec1c26c4b685d8930799b9a6.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 92 +++++++++++++------------------------- 1 file changed, 32 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 90846dc69061..7f0386175230 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4468,28 +4468,20 @@ static u8 svd_to_vic(u8 svd) return svd; } +/* + * Return a display mode for the 0-based vic_index'th VIC across all CTA VDBs in + * the EDID, or NULL on errors. + */ static struct drm_display_mode * -drm_display_mode_from_vic_index(struct drm_connector *connector, - const u8 *video_db, u8 video_len, - u8 video_index) +drm_display_mode_from_vic_index(struct drm_connector *connector, int vic_index) { + const struct drm_display_info *info = &connector->display_info; struct drm_device *dev = connector->dev; - struct drm_display_mode *newmode; - u8 vic; - if (video_db == NULL || video_index >= video_len) + if (!info->vics || vic_index >= info->vics_len || !info->vics[vic_index]) return NULL; - /* CEA modes are numbered 1..127 */ - vic = svd_to_vic(video_db[video_index]); - if (!drm_valid_cea_vic(vic)) - return NULL; - - newmode = drm_mode_duplicate(dev, cea_mode_for_vic(vic)); - if (!newmode) - return NULL; - - return newmode; + return drm_display_mode_from_cea_vic(dev, info->vics[vic_index]); } /* @@ -4538,9 +4530,8 @@ static int do_y420vdb_modes(struct drm_connector *connector, * Makes an entry for a videomode in the YCBCR 420 bitmap */ static void -drm_add_cmdb_modes(struct drm_connector *connector, u8 svd) +drm_add_cmdb_modes(struct drm_connector *connector, u8 vic) { - u8 vic = svd_to_vic(svd); struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; if (!drm_valid_cea_vic(vic)) @@ -4577,16 +4568,20 @@ drm_display_mode_from_cea_vic(struct drm_device *dev, } EXPORT_SYMBOL(drm_display_mode_from_cea_vic); -static int -do_cea_modes(struct drm_connector *connector, const u8 *db, u8 len) +/* Add modes based on VICs parsed in parse_cta_vdb() */ +static int add_cta_vdb_modes(struct drm_connector *connector) { + const struct drm_display_info *info = &connector->display_info; + const struct drm_hdmi_info *hdmi = &info->hdmi; int i, modes = 0; - struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; - for (i = 0; i < len; i++) { + if (!info->vics) + return 0; + + for (i = 0; i < info->vics_len; i++) { struct drm_display_mode *mode; - mode = drm_display_mode_from_vic_index(connector, db, len, i); + mode = drm_display_mode_from_vic_index(connector, i); if (mode) { /* * YCBCR420 capability block contains a bitmap which @@ -4598,7 +4593,7 @@ do_cea_modes(struct drm_connector *connector, const u8 *db, u8 len) * Add YCBCR420 modes only if sink is HDMI 2.0 capable. */ if (i < 64 && hdmi->y420_cmdb_map & (1ULL << i)) - drm_add_cmdb_modes(connector, db[i]); + drm_add_cmdb_modes(connector, info->vics[i]); drm_mode_probed_add(connector, mode); modes++; @@ -4693,15 +4688,13 @@ static int add_hdmi_mode(struct drm_connector *connector, u8 vic) } static int add_3d_struct_modes(struct drm_connector *connector, u16 structure, - const u8 *video_db, u8 video_len, u8 video_index) + int vic_index) { struct drm_display_mode *newmode; int modes = 0; if (structure & (1 << 0)) { - newmode = drm_display_mode_from_vic_index(connector, video_db, - video_len, - video_index); + newmode = drm_display_mode_from_vic_index(connector, vic_index); if (newmode) { newmode->flags |= DRM_MODE_FLAG_3D_FRAME_PACKING; drm_mode_probed_add(connector, newmode); @@ -4709,9 +4702,7 @@ static int add_3d_struct_modes(struct drm_connector *connector, u16 structure, } } if (structure & (1 << 6)) { - newmode = drm_display_mode_from_vic_index(connector, video_db, - video_len, - video_index); + newmode = drm_display_mode_from_vic_index(connector, vic_index); if (newmode) { newmode->flags |= DRM_MODE_FLAG_3D_TOP_AND_BOTTOM; drm_mode_probed_add(connector, newmode); @@ -4719,9 +4710,7 @@ static int add_3d_struct_modes(struct drm_connector *connector, u16 structure, } } if (structure & (1 << 8)) { - newmode = drm_display_mode_from_vic_index(connector, video_db, - video_len, - video_index); + newmode = drm_display_mode_from_vic_index(connector, vic_index); if (newmode) { newmode->flags |= DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF; drm_mode_probed_add(connector, newmode); @@ -4742,8 +4731,7 @@ static int add_3d_struct_modes(struct drm_connector *connector, u16 structure, * also adds the stereo 3d modes when applicable. */ static int -do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len, - const u8 *video_db, u8 video_len) +do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len) { struct drm_display_info *info = &connector->display_info; int modes = 0, offset = 0, i, multi_present = 0, multi_len; @@ -4818,9 +4806,7 @@ do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len, for (i = 0; i < 16; i++) { if (mask & (1 << i)) modes += add_3d_struct_modes(connector, - structure_all, - video_db, - video_len, i); + structure_all, i); } } @@ -4857,8 +4843,6 @@ do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len, if (newflag != 0) { newmode = drm_display_mode_from_vic_index(connector, - video_db, - video_len, vic_index); if (newmode) { @@ -5249,20 +5233,16 @@ static int add_cea_modes(struct drm_connector *connector, { const struct cea_db *db; struct cea_db_iter iter; - const u8 *hdmi = NULL, *video = NULL; - u8 hdmi_len = 0, video_len = 0; - int modes = 0; + int modes; + + /* CTA VDB block VICs parsed earlier */ + modes = add_cta_vdb_modes(connector); cea_db_iter_edid_begin(drm_edid, &iter); cea_db_iter_for_each(db, &iter) { - if (cea_db_tag(db) == CTA_DB_VIDEO) { - video = cea_db_data(db); - video_len = cea_db_payload_len(db); - modes += do_cea_modes(connector, video, video_len); - } else if (cea_db_is_hdmi_vsdb(db)) { - /* FIXME: Switch to use cea_db_data() */ - hdmi = (const u8 *)db; - hdmi_len = cea_db_payload_len(db); + if (cea_db_is_hdmi_vsdb(db)) { + modes += do_hdmi_vsdb_modes(connector, (const u8 *)db, + cea_db_payload_len(db)); } else if (cea_db_is_y420vdb(db)) { const u8 *vdb420 = cea_db_data(db) + 1; @@ -5273,14 +5253,6 @@ static int add_cea_modes(struct drm_connector *connector, } cea_db_iter_end(&iter); - /* - * We parse the HDMI VSDB after having added the cea modes as we will be - * patching their flags when the sink supports stereo 3D. - */ - if (hdmi) - modes += do_hdmi_vsdb_modes(connector, hdmi, hdmi_len, - video, video_len); - return modes; } From 4ed29f398b5aa55f12e8f8da7cdf7c22d82018b7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:20 +0200 Subject: [PATCH 08/45] drm/edid: use VIC in AVI infoframe if sink lists it in CTA VDB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently there are HDMI 1.4 compatible displays out there that support VICs from specs later than CTA-861-D, i.e. VIC > 64, although HDMI 1.4 references CTA-861-D only. We try to avoid using VICs from the later specs in the AVI infoframes to avoid upsetting sinks that conform to earlier specs. However, it seems reasonable to do this when the sink claims it supports the VIC. With the pre-parsed list of VICs handy, this is now trivial. References: https://gitlab.freedesktop.org/drm/intel/-/issues/6153 Cc: Ville Syrjälä Cc: William Tseng Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/775124fd07a5b7892e869becc3dd8dadb328ae5f.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 7f0386175230..3dfcd6450f10 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5864,6 +5864,22 @@ static void parse_cta_vdb(struct drm_connector *connector, const struct cea_db * } } +static bool cta_vdb_has_vic(const struct drm_connector *connector, u8 vic) +{ + const struct drm_display_info *info = &connector->display_info; + int i; + + if (!vic || !info->vics) + return false; + + for (i = 0; i < info->vics_len; i++) { + if (info->vics[i] == vic) + return true; + } + + return false; +} + static void drm_parse_vcdb(struct drm_connector *connector, const u8 *db) { struct drm_display_info *info = &connector->display_info; @@ -6909,10 +6925,14 @@ static u8 drm_mode_cea_vic(const struct drm_connector *connector, * * HDMI 1.4 (CTA-861-D) VIC range: [1..64] * HDMI 2.0 (CTA-861-F) VIC range: [1..107] + * + * If the sink lists the VIC in CTA VDB, assume it's fine, regardless of HDMI + * version. */ static u8 vic_for_avi_infoframe(const struct drm_connector *connector, u8 vic) { - if (!is_hdmi2_sink(connector) && vic > 64) + if (!is_hdmi2_sink(connector) && vic > 64 && + !cta_vdb_has_vic(connector, vic)) return 0; return vic; From 26c2ff77349927d6544db107f65757f447574dae Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:21 +0200 Subject: [PATCH 09/45] drm/edid: rename struct drm_display_info *display to *info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the local variable to info for consistency. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/d35a50c714e21869afcabfafd5c5e590936b791a.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3dfcd6450f10..4e9108e9fc96 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6011,14 +6011,14 @@ static void drm_parse_dsc_info(struct drm_hdmi_dsc_cap *hdmi_dsc, static void drm_parse_hdmi_forum_scds(struct drm_connector *connector, const u8 *hf_scds) { - struct drm_display_info *display = &connector->display_info; - struct drm_hdmi_info *hdmi = &display->hdmi; + struct drm_display_info *info = &connector->display_info; + struct drm_hdmi_info *hdmi = &info->hdmi; struct drm_hdmi_dsc_cap *hdmi_dsc = &hdmi->dsc_cap; int max_tmds_clock = 0; u8 max_frl_rate = 0; bool dsc_support = false; - display->has_hdmi_infoframe = true; + info->has_hdmi_infoframe = true; if (hf_scds[6] & 0x80) { hdmi->scdc.supported = true; @@ -6042,7 +6042,7 @@ static void drm_parse_hdmi_forum_scds(struct drm_connector *connector, max_tmds_clock = hf_scds[5] * 5000; if (max_tmds_clock > 340000) { - display->max_tmds_clock = max_tmds_clock; + info->max_tmds_clock = max_tmds_clock; } if (scdc->supported) { From 61e05fdc096bc27b32a93fa85f31ccf535d6a5a9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:22 +0200 Subject: [PATCH 10/45] drm/edid: refactor CTA Y420CMDB parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have pre-parsed CTA VDB VICs stored in info->vics, leverage that to simplify CTA Y420CMDB parsing. Move updating the y420_cmdb_modes bitmap to the display info parsing stage, instead of updating it during add modes. This allows us to drop the intermediate y420_cmdb_map from display info, and replace it with a local variable. This is prerequisite work for overall better separation of the two parsing steps (updating display info and adding modes). Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/7a0e5e99a83f203b6a8981d263b89b2bb7d2fe15.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 86 ++++++++++++++++++------------------- include/drm/drm_connector.h | 3 -- 2 files changed, 43 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 4e9108e9fc96..ead7a4ce0422 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4522,24 +4522,6 @@ static int do_y420vdb_modes(struct drm_connector *connector, return modes; } -/* - * drm_add_cmdb_modes - Add a YCBCR 420 mode into bitmap - * @connector: connector corresponding to the HDMI sink - * @vic: CEA vic for the video mode to be added in the map - * - * Makes an entry for a videomode in the YCBCR 420 bitmap - */ -static void -drm_add_cmdb_modes(struct drm_connector *connector, u8 vic) -{ - struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; - - if (!drm_valid_cea_vic(vic)) - return; - - bitmap_set(hdmi->y420_cmdb_modes, vic, 1); -} - /** * drm_display_mode_from_cea_vic() - return a mode for CEA VIC * @dev: DRM device @@ -4572,7 +4554,6 @@ EXPORT_SYMBOL(drm_display_mode_from_cea_vic); static int add_cta_vdb_modes(struct drm_connector *connector) { const struct drm_display_info *info = &connector->display_info; - const struct drm_hdmi_info *hdmi = &info->hdmi; int i, modes = 0; if (!info->vics) @@ -4583,18 +4564,6 @@ static int add_cta_vdb_modes(struct drm_connector *connector) mode = drm_display_mode_from_vic_index(connector, i); if (mode) { - /* - * YCBCR420 capability block contains a bitmap which - * gives the index of CEA modes from CEA VDB, which - * can support YCBCR 420 sampling output also (apart - * from RGB/YCBCR444 etc). - * For example, if the bit 0 in bitmap is set, - * first mode in VDB can support YCBCR420 output too. - * Add YCBCR420 modes only if sink is HDMI 2.0 capable. - */ - if (i < 64 && hdmi->y420_cmdb_map & (1ULL << i)) - drm_add_cmdb_modes(connector, info->vics[i]); - drm_mode_probed_add(connector, mode); modes++; } @@ -5188,20 +5157,26 @@ static int edid_hfeeodb_extension_block_count(const struct edid *edid) return cta[4 + 2]; } -static void drm_parse_y420cmdb_bitmap(struct drm_connector *connector, - const u8 *db) +/* + * CTA-861 YCbCr 4:2:0 Capability Map Data Block (CTA Y420CMDB) + * + * Y420CMDB contains a bitmap which gives the index of CTA modes from CTA VDB, + * which can support YCBCR 420 sampling output also (apart from RGB/YCBCR444 + * etc). For example, if the bit 0 in bitmap is set, first mode in VDB can + * support YCBCR420 output too. + */ +static void parse_cta_y420cmdb(struct drm_connector *connector, + const struct cea_db *db, u64 *y420cmdb_map) { struct drm_display_info *info = &connector->display_info; - struct drm_hdmi_info *hdmi = &info->hdmi; - u8 map_len = cea_db_payload_len(db) - 1; - u8 count; + int i, map_len = cea_db_payload_len(db) - 1; + const u8 *data = cea_db_data(db) + 1; u64 map = 0; if (map_len == 0) { /* All CEA modes support ycbcr420 sampling also.*/ - hdmi->y420_cmdb_map = U64_MAX; - info->color_formats |= DRM_COLOR_FORMAT_YCBCR420; - return; + map = U64_MAX; + goto out; } /* @@ -5219,13 +5194,14 @@ static void drm_parse_y420cmdb_bitmap(struct drm_connector *connector, if (WARN_ON_ONCE(map_len > 8)) map_len = 8; - for (count = 0; count < map_len; count++) - map |= (u64)db[2 + count] << (8 * count); + for (i = 0; i < map_len; i++) + map |= (u64)data[i] << (8 * i); +out: if (map) info->color_formats |= DRM_COLOR_FORMAT_YCBCR420; - hdmi->y420_cmdb_map = map; + *y420cmdb_map = map; } static int add_cea_modes(struct drm_connector *connector, @@ -5864,6 +5840,26 @@ static void parse_cta_vdb(struct drm_connector *connector, const struct cea_db * } } +/* + * Update y420_cmdb_modes based on previously parsed CTA VDB and Y420CMDB. + * + * Translate the y420cmdb_map based on VIC indexes to y420_cmdb_modes indexed + * using the VICs themselves. + */ +static void update_cta_y420cmdb(struct drm_connector *connector, u64 y420cmdb_map) +{ + struct drm_display_info *info = &connector->display_info; + struct drm_hdmi_info *hdmi = &info->hdmi; + int i, len = min_t(int, info->vics_len, BITS_PER_TYPE(y420cmdb_map)); + + for (i = 0; i < len; i++) { + u8 vic = info->vics[i]; + + if (vic && y420cmdb_map & BIT_ULL(i)) + bitmap_set(hdmi->y420_cmdb_modes, vic, 1); + } +} + static bool cta_vdb_has_vic(const struct drm_connector *connector, u8 vic) { const struct drm_display_info *info = &connector->display_info; @@ -6181,6 +6177,7 @@ static void drm_parse_cea_ext(struct drm_connector *connector, const struct cea_db *db; struct cea_db_iter iter; const u8 *edid_ext; + u64 y420cmdb_map = 0; drm_edid_iter_begin(drm_edid, &edid_iter); drm_edid_iter_for_each(edid_ext, &edid_iter) { @@ -6218,7 +6215,7 @@ static void drm_parse_cea_ext(struct drm_connector *connector, else if (cea_db_is_microsoft_vsdb(db)) drm_parse_microsoft_vsdb(connector, data); else if (cea_db_is_y420cmdb(db)) - drm_parse_y420cmdb_bitmap(connector, data); + parse_cta_y420cmdb(connector, db, &y420cmdb_map); else if (cea_db_is_vcdb(db)) drm_parse_vcdb(connector, data); else if (cea_db_is_hdmi_hdr_metadata_block(db)) @@ -6227,6 +6224,9 @@ static void drm_parse_cea_ext(struct drm_connector *connector, parse_cta_vdb(connector, db); } cea_db_iter_end(&iter); + + if (y420cmdb_map) + update_cta_y420cmdb(connector, y420cmdb_map); } static diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index d97ed06d5837..1c26c4e72c62 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -304,9 +304,6 @@ struct drm_hdmi_info { */ unsigned long y420_cmdb_modes[BITS_TO_LONGS(256)]; - /** @y420_cmdb_map: bitmap of SVD index, to extraxt vcb modes */ - u64 y420_cmdb_map; - /** @y420_dc_modes: bitmap of deep color support index */ u8 y420_dc_modes; From c54e2e23c3fc3112c821b490e78ea22a7ecea288 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:23 +0200 Subject: [PATCH 11/45] drm/edid: split CTA Y420VDB info and mode parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate the parsing of display info and modes from the CTA Y420VDB. This is prerequisite work for overall better separation of the two parsing steps. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/3bc5fe6650a6ce4249803f7192096764ea724e05.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ead7a4ce0422..076ba125c38d 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4497,10 +4497,8 @@ drm_display_mode_from_vic_index(struct drm_connector *connector, int vic_index) static int do_y420vdb_modes(struct drm_connector *connector, const u8 *svds, u8 svds_len) { - int modes = 0, i; struct drm_device *dev = connector->dev; - struct drm_display_info *info = &connector->display_info; - struct drm_hdmi_info *hdmi = &info->hdmi; + int modes = 0, i; for (i = 0; i < svds_len; i++) { u8 vic = svd_to_vic(svds[i]); @@ -4512,13 +4510,10 @@ static int do_y420vdb_modes(struct drm_connector *connector, newmode = drm_mode_duplicate(dev, cea_mode_for_vic(vic)); if (!newmode) break; - bitmap_set(hdmi->y420_vdb_modes, vic, 1); drm_mode_probed_add(connector, newmode); modes++; } - if (modes > 0) - info->color_formats |= DRM_COLOR_FORMAT_YCBCR420; return modes; } @@ -5876,6 +5871,26 @@ static bool cta_vdb_has_vic(const struct drm_connector *connector, u8 vic) return false; } +/* CTA-861-H YCbCr 4:2:0 Video Data Block (CTA Y420VDB) */ +static void parse_cta_y420vdb(struct drm_connector *connector, + const struct cea_db *db) +{ + struct drm_display_info *info = &connector->display_info; + struct drm_hdmi_info *hdmi = &info->hdmi; + const u8 *svds = cea_db_data(db) + 1; + int i; + + for (i = 0; i < cea_db_payload_len(db) - 1; i++) { + u8 vic = svd_to_vic(svds[i]); + + if (!drm_valid_cea_vic(vic)) + continue; + + bitmap_set(hdmi->y420_vdb_modes, vic, 1); + info->color_formats |= DRM_COLOR_FORMAT_YCBCR420; + } +} + static void drm_parse_vcdb(struct drm_connector *connector, const u8 *db) { struct drm_display_info *info = &connector->display_info; @@ -6216,6 +6231,8 @@ static void drm_parse_cea_ext(struct drm_connector *connector, drm_parse_microsoft_vsdb(connector, data); else if (cea_db_is_y420cmdb(db)) parse_cta_y420cmdb(connector, db, &y420cmdb_map); + else if (cea_db_is_y420vdb(db)) + parse_cta_y420vdb(connector, db); else if (cea_db_is_vcdb(db)) drm_parse_vcdb(connector, data); else if (cea_db_is_hdmi_hdr_metadata_block(db)) From 1ee3e217f98e4a14a792ed2d7b42417ad1194922 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:24 +0200 Subject: [PATCH 12/45] drm/edid: fix and clarify HDMI VSDB audio latency parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helpers for Latency_Fields_Present and I_Latency_Fields_Present bits, and fix the parsing: - Respect specification regarding "I_Latency_Fields_Present shall be zero if Latency_Fields_Present is zero". - Don't claim latency fields are present if the data block isn't big enough to hold them. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/80426772a2d2e17bebf6f58d99b7d0cf6260c2d6.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 076ba125c38d..847076b29594 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4685,6 +4685,16 @@ static int add_3d_struct_modes(struct drm_connector *connector, u16 structure, return modes; } +static bool hdmi_vsdb_latency_present(const u8 *db) +{ + return db[8] & BIT(7); +} + +static bool hdmi_vsdb_i_latency_present(const u8 *db) +{ + return hdmi_vsdb_latency_present(db) && db[8] & BIT(6); +} + /* * do_hdmi_vsdb_modes - Parse the HDMI Vendor Specific data block * @connector: connector corresponding to the HDMI sink @@ -5357,6 +5367,7 @@ drm_parse_hdr_metadata_block(struct drm_connector *connector, const u8 *db) } } +/* HDMI Vendor-Specific Data Block (HDMI VSDB, H14b-VSDB) */ static void drm_parse_hdmi_vsdb_audio(struct drm_connector *connector, const u8 *db) { @@ -5364,18 +5375,18 @@ drm_parse_hdmi_vsdb_audio(struct drm_connector *connector, const u8 *db) if (len >= 6 && (db[6] & (1 << 7))) connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] |= DRM_ELD_SUPPORTS_AI; - if (len >= 8) { - connector->latency_present[0] = db[8] >> 7; - connector->latency_present[1] = (db[8] >> 6) & 1; - } - if (len >= 9) + + if (len >= 10 && hdmi_vsdb_latency_present(db)) { + connector->latency_present[0] = true; connector->video_latency[0] = db[9]; - if (len >= 10) connector->audio_latency[0] = db[10]; - if (len >= 11) + } + + if (len >= 12 && hdmi_vsdb_i_latency_present(db)) { + connector->latency_present[1] = true; connector->video_latency[1] = db[11]; - if (len >= 12) connector->audio_latency[1] = db[12]; + } drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] HDMI: latency present %d %d, video latency %d %d, audio latency %d %d\n", From cba83c1fc38612c3d2c7b1bfed9d882e4848fb0d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:25 +0200 Subject: [PATCH 13/45] drm/edid: add helper for HDMI VSDB audio latency field length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper for skipping the HDMI VSDB audio latency fields. There's a functional change for HDMI VSDB blocks that do not respect the spec: "I_Latency_Fields_Present shall be zero if Latency_Fields_Present is zero". We assume this to hold when skipping the latency fields, and ignore non-zero I_Latency_Fields_Present if Latency_Fields_Present is zero. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/da4293203ef2ddeb0bf66a2bfdbc129ab609c543.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 847076b29594..93067b8dd9f6 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4695,6 +4695,16 @@ static bool hdmi_vsdb_i_latency_present(const u8 *db) return hdmi_vsdb_latency_present(db) && db[8] & BIT(6); } +static int hdmi_vsdb_latency_length(const u8 *db) +{ + if (hdmi_vsdb_i_latency_present(db)) + return 4; + else if (hdmi_vsdb_latency_present(db)) + return 2; + else + return 0; +} + /* * do_hdmi_vsdb_modes - Parse the HDMI Vendor Specific data block * @connector: connector corresponding to the HDMI sink @@ -4720,13 +4730,7 @@ do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len) if (!(db[8] & (1 << 5))) goto out; - /* Latency_Fields_Present */ - if (db[8] & (1 << 7)) - offset += 2; - - /* I_Latency_Fields_Present */ - if (db[8] & (1 << 6)) - offset += 2; + offset += hdmi_vsdb_latency_length(db); /* the declared length is not long enough for the 2 first bytes * of additional video format capabilities */ From 35b137630f08d913fc2e33df33ccc2570dff3f7d Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:17 +0100 Subject: [PATCH 14/45] accel/ivpu: Introduce a new DRM driver for Intel VPU VPU stands for Versatile Processing Unit and it's a CPU-integrated inference accelerator for Computer Vision and Deep Learning applications. The VPU device consist of following components: - Buttress - provides CPU to VPU integration, interrupt, frequency and power management. - Memory Management Unit (based on ARM MMU-600) - translates VPU to host DMA addresses, isolates user workloads. - RISC based microcontroller - executes firmware that provides job execution API for the kernel-mode driver - Neural Compute Subsystem (NCS) - does the actual work, provides Compute and Copy engines. - Network on Chip (NoC) - network fabric connecting all the components This driver supports VPU IP v2.7 integrated into Intel Meteor Lake client CPUs (14th generation). Module sources are at drivers/accel/ivpu and module name is "intel_vpu.ko". This patch includes only very besic functionality: - module, PCI device and IRQ initialization - register definitions and low level register manipulation functions - SET/GET_PARAM ioctls - power up without firmware Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-2-jacek.lawrynowicz@linux.intel.com --- MAINTAINERS | 9 + drivers/Makefile | 1 + drivers/accel/Kconfig | 2 + drivers/accel/Makefile | 3 + drivers/accel/ivpu/Kconfig | 15 + drivers/accel/ivpu/Makefile | 8 + drivers/accel/ivpu/TODO | 11 + drivers/accel/ivpu/ivpu_drv.c | 353 +++++++++ drivers/accel/ivpu/ivpu_drv.h | 162 ++++ drivers/accel/ivpu/ivpu_hw.h | 170 +++++ drivers/accel/ivpu/ivpu_hw_mtl.c | 1048 ++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_hw_mtl_reg.h | 280 +++++++ drivers/accel/ivpu/ivpu_hw_reg_io.h | 115 +++ include/uapi/drm/ivpu_accel.h | 95 +++ 14 files changed, 2272 insertions(+) create mode 100644 drivers/accel/Makefile create mode 100644 drivers/accel/ivpu/Kconfig create mode 100644 drivers/accel/ivpu/Makefile create mode 100644 drivers/accel/ivpu/TODO create mode 100644 drivers/accel/ivpu/ivpu_drv.c create mode 100644 drivers/accel/ivpu/ivpu_drv.h create mode 100644 drivers/accel/ivpu/ivpu_hw.h create mode 100644 drivers/accel/ivpu/ivpu_hw_mtl.c create mode 100644 drivers/accel/ivpu/ivpu_hw_mtl_reg.h create mode 100644 drivers/accel/ivpu/ivpu_hw_reg_io.h create mode 100644 include/uapi/drm/ivpu_accel.h diff --git a/MAINTAINERS b/MAINTAINERS index 1bd2a1d3f14f..071f7b62a36e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6894,6 +6894,15 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/accel.git F: Documentation/accel/ F: drivers/accel/ +DRM ACCEL DRIVERS FOR INTEL VPU +M: Jacek Lawrynowicz +M: Stanislaw Gruszka +L: dri-devel@lists.freedesktop.org +S: Supported +T: git git://anongit.freedesktop.org/drm/drm-misc +F: drivers/accel/ivpu/ +F: include/uapi/drm/ivpu_accel.h + DRM DRIVERS FOR ALLWINNER A10 M: Maxime Ripard M: Chen-Yu Tsai diff --git a/drivers/Makefile b/drivers/Makefile index bdf1c66141c9..f0972e2226c9 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -189,3 +189,4 @@ obj-$(CONFIG_COUNTER) += counter/ obj-$(CONFIG_MOST) += most/ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ +obj-$(CONFIG_DRM_ACCEL) += accel/ \ No newline at end of file diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index c9ce849b2984..4989376e5938 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -22,3 +22,5 @@ menuconfig DRM_ACCEL major number than GPUs, and will be exposed to user-space using different device files, called accel/accel* (in /dev, sysfs and debugfs). + +source "drivers/accel/ivpu/Kconfig" diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile new file mode 100644 index 000000000000..b1036dbc0ba4 --- /dev/null +++ b/drivers/accel/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-y += ivpu/ diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig new file mode 100644 index 000000000000..9bdf168bf1d0 --- /dev/null +++ b/drivers/accel/ivpu/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_IVPU + tristate "Intel VPU for Meteor Lake and newer" + depends on DRM_ACCEL + depends on X86_64 && !UML + depends on PCI && PCI_MSI + select FW_LOADER + select SHMEM + help + Choose this option if you have a system that has an 14th generation Intel CPU + or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated + inference accelerator for Computer Vision and Deep Learning applications. + + If "M" is selected, the module will be called intel_vpu. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile new file mode 100644 index 000000000000..2cc8394101cd --- /dev/null +++ b/drivers/accel/ivpu/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2023 Intel Corporation + +intel_vpu-y := \ + ivpu_drv.o \ + ivpu_hw_mtl.o + +obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o \ No newline at end of file diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO new file mode 100644 index 000000000000..9077217ae10f --- /dev/null +++ b/drivers/accel/ivpu/TODO @@ -0,0 +1,11 @@ +- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler() +- Implement support for BLOB IDs +- Add debugfs support to improve debugging and testing +- Add tracing events for performance debugging +- Implement HW based scheduling support +- Use syncobjs for submit/sync +- Refactor IPC protocol to improve message latency +- Implement BO cache and MADVISE IOCTL +- Add support for user allocated buffers using prime import and dma-buf heaps +- Refactor struct ivpu_bo to use struct drm_gem_shmem_object +- Add driver/device documentation diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c new file mode 100644 index 000000000000..1134fb0028ad --- /dev/null +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_hw.h" + +#ifndef DRIVER_VERSION_STR +#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ + __stringify(DRM_IVPU_DRIVER_MINOR) "." +#endif + +static const struct drm_driver driver; + +int ivpu_dbg_mask; +module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); +MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); + +u8 ivpu_pll_min_ratio; +module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); +MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); + +u8 ivpu_pll_max_ratio = U8_MAX; +module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); +MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); + +struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) +{ + kref_get(&file_priv->ref); + return file_priv; +} + +static void file_priv_release(struct kref *ref) +{ + struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); + + kfree(file_priv); +} + +void ivpu_file_priv_put(struct ivpu_file_priv **link) +{ + struct ivpu_file_priv *file_priv = *link; + + WARN_ON(!file_priv); + + *link = NULL; + kref_put(&file_priv->ref, file_priv_release); +} + +static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + struct drm_ivpu_param *args = data; + int ret = 0; + + switch (args->param) { + case DRM_IVPU_PARAM_DEVICE_ID: + args->value = pdev->device; + break; + case DRM_IVPU_PARAM_DEVICE_REVISION: + args->value = pdev->revision; + break; + case DRM_IVPU_PARAM_PLATFORM_TYPE: + args->value = vdev->platform; + break; + case DRM_IVPU_PARAM_CORE_CLOCK_RATE: + args->value = ivpu_hw_reg_pll_freq_get(vdev); + break; + case DRM_IVPU_PARAM_NUM_CONTEXTS: + args->value = ivpu_get_context_count(vdev); + break; + case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: + args->value = vdev->hw->ranges.user_low.start; + break; + case DRM_IVPU_PARAM_CONTEXT_PRIORITY: + args->value = file_priv->priority; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_param *args = data; + int ret = 0; + + switch (args->param) { + case DRM_IVPU_PARAM_CONTEXT_PRIORITY: + if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME) + file_priv->priority = args->value; + else + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int ivpu_open(struct drm_device *dev, struct drm_file *file) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct ivpu_file_priv *file_priv; + + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) + return -ENOMEM; + + file_priv->vdev = vdev; + file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; + kref_init(&file_priv->ref); + + file->driver_priv = file_priv; + return 0; +} + +static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + + ivpu_file_priv_put(&file_priv); +} + +static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), +}; + +int ivpu_shutdown(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_irq_disable(vdev); + + ret = ivpu_hw_power_down(vdev); + if (ret) + ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + + return ret; +} + +static const struct file_operations ivpu_fops = { + .owner = THIS_MODULE, + .mmap = drm_gem_mmap, + DRM_ACCEL_FOPS, +}; + +static const struct drm_driver driver = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, + + .open = ivpu_open, + .postclose = ivpu_postclose, + + .ioctls = ivpu_drm_ioctls, + .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), + .fops = &ivpu_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRM_IVPU_DRIVER_MAJOR, + .minor = DRM_IVPU_DRIVER_MINOR, +}; + +static int ivpu_irq_init(struct ivpu_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate a MSI IRQ: %d\n", ret); + return ret; + } + + vdev->irq = pci_irq_vector(pdev, 0); + + ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + IRQF_NO_AUTOEN, DRIVER_NAME, vdev); + if (ret) + ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); + + return ret; +} + +static int ivpu_pci_init(struct ivpu_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + struct resource *bar0 = &pdev->resource[0]; + struct resource *bar4 = &pdev->resource[4]; + int ret; + + ivpu_dbg(vdev, MISC, "Mapping BAR0 (RegV) %pR\n", bar0); + vdev->regv = devm_ioremap_resource(vdev->drm.dev, bar0); + if (IS_ERR(vdev->regv)) { + ivpu_err(vdev, "Failed to map bar 0: %pe\n", vdev->regv); + return PTR_ERR(vdev->regv); + } + + ivpu_dbg(vdev, MISC, "Mapping BAR4 (RegB) %pR\n", bar4); + vdev->regb = devm_ioremap_resource(vdev->drm.dev, bar4); + if (IS_ERR(vdev->regb)) { + ivpu_err(vdev, "Failed to map bar 4: %pe\n", vdev->regb); + return PTR_ERR(vdev->regb); + } + + ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38)); + if (ret) { + ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret); + return ret; + } + + /* Clear any pending errors */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); + + ret = pcim_enable_device(pdev); + if (ret) { + ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int ivpu_dev_init(struct ivpu_device *vdev) +{ + int ret; + + vdev->hw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->hw), GFP_KERNEL); + if (!vdev->hw) + return -ENOMEM; + + vdev->hw->ops = &ivpu_hw_mtl_ops; + vdev->platform = IVPU_PLATFORM_INVALID; + vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; + vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + + ret = ivpu_pci_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret); + goto err_xa_destroy; + } + + ret = ivpu_irq_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret); + goto err_xa_destroy; + } + + /* Init basic HW info based on buttress registers which are accessible before power up */ + ret = ivpu_hw_info_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret); + goto err_xa_destroy; + } + + /* Power up early so the rest of init code can access VPU registers */ + ret = ivpu_hw_power_up(vdev); + if (ret) { + ivpu_err(vdev, "Failed to power up HW: %d\n", ret); + goto err_xa_destroy; + } + + return 0; + +err_xa_destroy: + xa_destroy(&vdev->context_xa); + return ret; +} + +static void ivpu_dev_fini(struct ivpu_device *vdev) +{ + ivpu_shutdown(vdev); + + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); + xa_destroy(&vdev->context_xa); +} + +static struct pci_device_id ivpu_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, + { } +}; +MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); + +static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ivpu_device *vdev; + int ret; + + vdev = devm_drm_dev_alloc(&pdev->dev, &driver, struct ivpu_device, drm); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + pci_set_drvdata(pdev, vdev); + + ret = ivpu_dev_init(vdev); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret); + return ret; + } + + ret = drm_dev_register(&vdev->drm, 0); + if (ret) { + dev_err(&pdev->dev, "Failed to register DRM device: %d\n", ret); + ivpu_dev_fini(vdev); + } + + return ret; +} + +static void ivpu_remove(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + + drm_dev_unregister(&vdev->drm); + ivpu_dev_fini(vdev); +} + +static struct pci_driver ivpu_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = ivpu_pci_ids, + .probe = ivpu_probe, + .remove = ivpu_remove, +}; + +module_pci_driver(ivpu_pci_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); +MODULE_VERSION(DRIVER_VERSION_STR); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h new file mode 100644 index 000000000000..d0b006893b1c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_DRV_H__ +#define __IVPU_DRV_H__ + +#include +#include +#include +#include + +#include +#include +#include + +#define DRIVER_NAME "intel_vpu" +#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" +#define DRIVER_DATE "20230117" + +#define PCI_DEVICE_ID_MTL 0x7d1d + +#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 +#define IVPU_CONTEXT_LIMIT 64 +#define IVPU_NUM_ENGINES 2 + +#define IVPU_PLATFORM_SILICON 0 +#define IVPU_PLATFORM_SIMICS 2 +#define IVPU_PLATFORM_FPGA 3 +#define IVPU_PLATFORM_INVALID 8 + +#define IVPU_DBG_REG BIT(0) +#define IVPU_DBG_IRQ BIT(1) +#define IVPU_DBG_MMU BIT(2) +#define IVPU_DBG_FILE BIT(3) +#define IVPU_DBG_MISC BIT(4) +#define IVPU_DBG_FW_BOOT BIT(5) +#define IVPU_DBG_PM BIT(6) +#define IVPU_DBG_IPC BIT(7) +#define IVPU_DBG_BO BIT(8) +#define IVPU_DBG_JOB BIT(9) +#define IVPU_DBG_JSM BIT(10) +#define IVPU_DBG_KREF BIT(11) +#define IVPU_DBG_RPM BIT(12) + +#define ivpu_err(vdev, fmt, ...) \ + drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_err_ratelimited(vdev, fmt, ...) \ + drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_warn(vdev, fmt, ...) \ + drm_warn(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_warn_ratelimited(vdev, fmt, ...) \ + drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_info(vdev, fmt, ...) drm_info(&(vdev)->drm, fmt, ##__VA_ARGS__) + +#define ivpu_dbg(vdev, type, fmt, args...) do { \ + if (unlikely(IVPU_DBG_##type & ivpu_dbg_mask)) \ + dev_dbg((vdev)->drm.dev, "[%s] " fmt, #type, ##args); \ +} while (0) + +#define IVPU_WA(wa_name) (vdev->wa.wa_name) + +struct ivpu_wa_table { + bool punit_disabled; + bool clear_runtime_mem; +}; + +struct ivpu_hw_info; + +struct ivpu_device { + struct drm_device drm; + void __iomem *regb; + void __iomem *regv; + u32 platform; + u32 irq; + + struct ivpu_wa_table wa; + struct ivpu_hw_info *hw; + + struct xarray context_xa; + struct xa_limit context_xa_limit; + + struct { + int boot; + int jsm; + int tdr; + int reschedule_suspend; + } timeout; +}; + +/* + * file_priv has its own refcount (ref) that allows user space to close the fd + * without blocking even if VPU is still processing some jobs. + */ +struct ivpu_file_priv { + struct kref ref; + struct ivpu_device *vdev; + u32 priority; +}; + +extern int ivpu_dbg_mask; +extern u8 ivpu_pll_min_ratio; +extern u8 ivpu_pll_max_ratio; + +struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); +void ivpu_file_priv_put(struct ivpu_file_priv **link); +int ivpu_shutdown(struct ivpu_device *vdev); + +static inline bool ivpu_is_mtl(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->device == PCI_DEVICE_ID_MTL; +} + +static inline u8 ivpu_revision(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->revision; +} + +static inline u16 ivpu_device_id(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->device; +} + +static inline struct ivpu_device *to_ivpu_device(struct drm_device *dev) +{ + return container_of(dev, struct ivpu_device, drm); +} + +static inline u32 ivpu_get_context_count(struct ivpu_device *vdev) +{ + struct xa_limit ctx_limit = vdev->context_xa_limit; + + return (ctx_limit.max - ctx_limit.min + 1); +} + +static inline u32 ivpu_get_platform(struct ivpu_device *vdev) +{ + WARN_ON_ONCE(vdev->platform == IVPU_PLATFORM_INVALID); + return vdev->platform; +} + +static inline bool ivpu_is_silicon(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_SILICON; +} + +static inline bool ivpu_is_simics(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_SIMICS; +} + +static inline bool ivpu_is_fpga(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; +} + +#endif /* __IVPU_DRV_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h new file mode 100644 index 000000000000..50a9304ab09c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_H__ +#define __IVPU_HW_H__ + +#include "ivpu_drv.h" + +struct ivpu_hw_ops { + int (*info_init)(struct ivpu_device *vdev); + int (*power_up)(struct ivpu_device *vdev); + int (*boot_fw)(struct ivpu_device *vdev); + int (*power_down)(struct ivpu_device *vdev); + bool (*is_idle)(struct ivpu_device *vdev); + void (*wdt_disable)(struct ivpu_device *vdev); + void (*diagnose_failure)(struct ivpu_device *vdev); + u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); + void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id); + u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev); + u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev); + void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr); + void (*irq_clear)(struct ivpu_device *vdev); + void (*irq_enable)(struct ivpu_device *vdev); + void (*irq_disable)(struct ivpu_device *vdev); + irqreturn_t (*irq_handler)(int irq, void *ptr); +}; + +struct ivpu_addr_range { + resource_size_t start; + resource_size_t end; +}; + +struct ivpu_hw_info { + const struct ivpu_hw_ops *ops; + struct { + struct ivpu_addr_range global_low; + struct ivpu_addr_range global_high; + struct ivpu_addr_range user_low; + struct ivpu_addr_range user_high; + struct ivpu_addr_range global_aliased_pio; + } ranges; + struct { + u8 min_ratio; + u8 max_ratio; + /* + * Pll ratio for the efficiency frequency. The VPU has optimum + * performance to power ratio at this frequency. + */ + u8 pn_ratio; + u32 profiling_freq; + } pll; + u32 tile_fuse; + u32 sku; + u16 config; +}; + +extern const struct ivpu_hw_ops ivpu_hw_mtl_ops; + +static inline int ivpu_hw_info_init(struct ivpu_device *vdev) +{ + return vdev->hw->ops->info_init(vdev); +}; + +static inline int ivpu_hw_power_up(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW power up\n"); + + return vdev->hw->ops->power_up(vdev); +}; + +static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev) +{ + return vdev->hw->ops->boot_fw(vdev); +}; + +static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) +{ + return vdev->hw->ops->is_idle(vdev); +}; + +static inline int ivpu_hw_power_down(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW power down\n"); + + return vdev->hw->ops->power_down(vdev); +}; + +static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) +{ + vdev->hw->ops->wdt_disable(vdev); +}; + +/* Register indirect accesses */ +static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_pll_freq_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_offset_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_size_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_enable_get(vdev); +}; + +static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id) +{ + vdev->hw->ops->reg_db_set(vdev, db_id); +}; + +static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_ipc_rx_addr_get(vdev); +}; + +static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_ipc_rx_count_get(vdev); +}; + +static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr); +}; + +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_clear(vdev); +}; + +static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_enable(vdev); +}; + +static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_disable(vdev); +}; + +static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size) +{ + range->start = start; + range->end = start + size; +} + +static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) +{ + return range->end - range->start; +} + +static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) +{ + vdev->hw->ops->diagnose_failure(vdev); +} + +#endif /* __IVPU_HW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c new file mode 100644 index 000000000000..8a1b1c03f77b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -0,0 +1,1048 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_hw.h" + +#define TILE_FUSE_ENABLE_BOTH 0x0 +#define TILE_FUSE_ENABLE_UPPER 0x1 +#define TILE_FUSE_ENABLE_LOWER 0x2 + +#define TILE_SKU_BOTH_MTL 0x3630 +#define TILE_SKU_LOWER_MTL 0x3631 +#define TILE_SKU_UPPER_MTL 0x3632 + +/* Work point configuration values */ +#define WP_CONFIG_1_TILE_5_3_RATIO 0x0101 +#define WP_CONFIG_1_TILE_4_3_RATIO 0x0102 +#define WP_CONFIG_2_TILE_5_3_RATIO 0x0201 +#define WP_CONFIG_2_TILE_4_3_RATIO 0x0202 +#define WP_CONFIG_0_TILE_PLL_OFF 0x0000 + +#define PLL_REF_CLK_FREQ (50 * 1000000) +#define PLL_SIMULATION_FREQ (10 * 1000000) +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) +#define PLL_DEFAULT_EPP_VALUE 0x80 + +#define TIM_SAFE_ENABLE 0xf1d0dead +#define TIM_WATCHDOG_RESET_VALUE 0xffffffff + +#define TIMEOUT_US (150 * USEC_PER_MSEC) +#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) +#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC) + +#define ICB_0_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) + +#define BUTTRESS_IRQ_MASK ((REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ + (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) + +#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) +#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) + +#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +static char *ivpu_platform_to_str(u32 platform) +{ + switch (platform) { + case IVPU_PLATFORM_SILICON: + return "IVPU_PLATFORM_SILICON"; + case IVPU_PLATFORM_SIMICS: + return "IVPU_PLATFORM_SIMICS"; + case IVPU_PLATFORM_FPGA: + return "IVPU_PLATFORM_FPGA"; + default: + return "Invalid platform"; + } +} + +static void ivpu_hw_read_platform(struct ivpu_device *vdev) +{ + u32 gen_ctrl = REGV_RD32(MTL_VPU_HOST_SS_GEN_CTRL); + u32 platform = REG_GET_FLD(MTL_VPU_HOST_SS_GEN_CTRL, PS, gen_ctrl); + + if (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA) + vdev->platform = platform; + else + vdev->platform = IVPU_PLATFORM_SILICON; + + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", + ivpu_platform_to_str(vdev->platform), vdev->platform); +} + +static void ivpu_hw_wa_init(struct ivpu_device *vdev) +{ + vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.clear_runtime_mem = false; +} + +static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) +{ + if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) { + vdev->timeout.boot = 100000; + vdev->timeout.jsm = 50000; + vdev->timeout.tdr = 2000000; + vdev->timeout.reschedule_suspend = 1000; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + vdev->timeout.reschedule_suspend = 10; + } +} + +static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) +{ + return REGB_POLL_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); +} + +/* Send KMD initiated workpoint change */ +static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, + u16 target_ratio, u16 config) +{ + int ret; + u32 val; + + ret = ivpu_pll_wait_for_cmd_send(vdev); + if (ret) { + ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_CMD); + val = REG_SET_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_CMD, val); + + ret = ivpu_pll_wait_for_cmd_send(vdev); + if (ret) + ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); + + return ret; +} + +static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(MTL_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); +} + +static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(MTL_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); +} + +static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(MTL_BUTTRESS_FMIN_FUSE); + fuse_min_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); + fuse_pn_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(MTL_BUTTRESS_FMAX_FUSE); + fuse_max_ratio = REG_GET_FLD(MTL_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); + + hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); + hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); + hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); +} + +static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) +{ + struct ivpu_hw_info *hw = vdev->hw; + u16 target_ratio; + u16 config; + int ret; + + if (IVPU_WA(punit_disabled)) { + ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n", + ivpu_platform_to_str(vdev->platform)); + return 0; + } + + if (enable) { + target_ratio = hw->pll.pn_ratio; + config = hw->config; + } else { + target_ratio = 0; + config = 0; + } + + ivpu_dbg(vdev, PM, "PLL workpoint request: %d Hz\n", PLL_RATIO_TO_FREQ(target_ratio)); + + ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config); + if (ret) { + ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); + return ret; + } + + ret = ivpu_pll_wait_for_lock(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL lock\n"); + return ret; + } + + if (enable) { + ret = ivpu_pll_wait_for_status_ready(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); + return ret; + } + } + + return 0; +} + +static int ivpu_pll_enable(struct ivpu_device *vdev) +{ + return ivpu_pll_drive(vdev, true); +} + +static int ivpu_pll_disable(struct ivpu_device *vdev) +{ + return ivpu_pll_drive(vdev, false); +} + +static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_CLR); + + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, TOP_NOC, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, DSS_MAS, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, MSS_MAS, val); + + REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_CLR, val); +} + +static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_SET); + + if (enable) { + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } + + REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_SET, val); +} + +static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_CLK_SET); + + if (enable) { + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } + + REGV_WR32(MTL_VPU_HOST_SS_CPR_CLK_SET, val); +} + +static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev) +{ + ivpu_boot_host_ss_rst_clr_assert(vdev); + + return ivpu_boot_noc_qreqn_check(vdev, 0x0); +} + +static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev) +{ + REGV_WR32(MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN, 0x0); +} + +static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN); + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(MTL_VPU_HOST_SS_NOC_QREQN, val); + + ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_axi_drive(vdev, true); +} + +static int ivpu_boot_host_ss_axi_disable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_axi_drive(vdev, false); +} + +static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); + if (enable) { + val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val); + + ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_top_noc_drive(vdev, true); +} + +static int ivpu_boot_host_ss_top_noc_disable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_top_noc_drive(vdev, false); +} + +static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) +{ + /* FPGA model (UPF) is not power aware, skipped Power Island polling */ + if (ivpu_is_fpga(vdev)) + return 0; + + return REGV_POLL_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, + exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); +} + +static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, val); +} + +static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) +{ + ivpu_boot_dpu_active_drive(vdev, false); + ivpu_boot_pwr_island_isolation_drive(vdev, true); + ivpu_boot_pwr_island_trickle_drive(vdev, false); + ivpu_boot_pwr_island_drive(vdev, false); + + return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); +} + +static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) +{ + int ret; + + ivpu_boot_pwr_island_trickle_drive(vdev, true); + ivpu_boot_pwr_island_drive(vdev, true); + + ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); + if (ret) { + ivpu_err(vdev, "Timed out waiting for power island status\n"); + return ret; + } + + ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qrenqn check %d\n", ret); + return ret; + } + + ivpu_boot_host_ss_clk_drive(vdev, true); + ivpu_boot_pwr_island_isolation_drive(vdev, false); + ivpu_boot_host_ss_rst_drive(vdev, true); + ivpu_boot_dpu_active_drive(vdev, true); + + return ret; +} + +static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + + REGV_WR32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_IF_TBU_MMUSSIDV); + + if (ivpu_is_fpga(vdev)) { + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + } else { + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_ARMMUSSIDV, val); + } + + REGV_WR32(MTL_VPU_HOST_IF_TBU_MMUSSIDV, val); +} + +static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) +{ + u32 val; + + val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); + val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); + + val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); + REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); +} + +static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 tansition: %d\n", ret); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_VPU_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val); + REGB_WR32(MTL_BUTTRESS_VPU_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to sync after D0i3 tansition: %d\n", ret); + + return ret; +} + +static int ivpu_hw_mtl_info_init(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 tile_fuse; + + tile_fuse = REGB_RD32(MTL_BUTTRESS_TILE_FUSE); + if (!REG_TEST_FLD(MTL_BUTTRESS_TILE_FUSE, VALID, tile_fuse)) + ivpu_warn(vdev, "Tile Fuse: Invalid (0x%x)\n", tile_fuse); + + hw->tile_fuse = REG_GET_FLD(MTL_BUTTRESS_TILE_FUSE, SKU, tile_fuse); + switch (hw->tile_fuse) { + case TILE_FUSE_ENABLE_LOWER: + hw->sku = TILE_SKU_LOWER_MTL; + hw->config = WP_CONFIG_1_TILE_5_3_RATIO; + ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Lower\n"); + break; + case TILE_FUSE_ENABLE_UPPER: + hw->sku = TILE_SKU_UPPER_MTL; + hw->config = WP_CONFIG_1_TILE_4_3_RATIO; + ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Upper\n"); + break; + case TILE_FUSE_ENABLE_BOTH: + hw->sku = TILE_SKU_BOTH_MTL; + hw->config = WP_CONFIG_2_TILE_5_3_RATIO; + ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Both\n"); + break; + default: + hw->config = WP_CONFIG_0_TILE_PLL_OFF; + ivpu_dbg(vdev, MISC, "Tile Fuse: Disable\n"); + break; + } + + ivpu_pll_init_frequency_ratios(vdev); + + ivpu_hw_init_range(&hw->ranges.global_low, 0x80000000, SZ_512M); + ivpu_hw_init_range(&hw->ranges.global_high, 0x180000000, SZ_2M); + ivpu_hw_init_range(&hw->ranges.user_low, 0xc0000000, 255 * SZ_1M); + ivpu_hw_init_range(&hw->ranges.user_high, 0x180000000, SZ_2G); + hw->ranges.global_aliased_pio = hw->ranges.user_low; + + return 0; +} + +static int ivpu_hw_mtl_reset(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_VPU_IP_RESET); + val = REG_SET_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(MTL_BUTTRESS_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +static int ivpu_hw_mtl_d0i3_enable(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_boot_d0i3_drive(vdev, true); + if (ret) + ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); + + udelay(5); /* VPU requires 5 us to complete the transition */ + + return ret; +} + +static int ivpu_hw_mtl_d0i3_disable(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_boot_d0i3_drive(vdev, false); + if (ret) + ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); + + return ret; +} + +static int ivpu_hw_mtl_power_up(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_read_platform(vdev); + ivpu_hw_wa_init(vdev); + ivpu_hw_timeouts_init(vdev); + + ret = ivpu_hw_mtl_reset(vdev); + if (ret) + ivpu_warn(vdev, "Failed to reset HW: %d\n", ret); + + ret = ivpu_hw_mtl_d0i3_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); + + ret = ivpu_pll_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); + return ret; + } + + ret = ivpu_boot_host_ss_configure(vdev); + if (ret) { + ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); + return ret; + } + + /* + * The control circuitry for vpu_idle indication logic powers up active. + * To ensure unnecessary low power mode signal from LRT during bring up, + * KMD disables the circuitry prior to bringing up the Main Power island. + */ + ivpu_boot_vpu_idle_gen_disable(vdev); + + ret = ivpu_boot_pwr_domain_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); + return ret; + } + + ret = ivpu_boot_host_ss_axi_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); + return ret; + } + + ret = ivpu_boot_host_ss_top_noc_enable(vdev); + if (ret) + ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); + + return ret; +} + +static int ivpu_hw_mtl_boot_fw(struct ivpu_device *vdev) +{ + ivpu_boot_no_snoop_enable(vdev); + ivpu_boot_tbu_mmu_enable(vdev); + ivpu_boot_soc_cpu_boot(vdev); + + return 0; +} + +static bool ivpu_hw_mtl_is_idle(struct ivpu_device *vdev) +{ + u32 val; + + if (IVPU_WA(punit_disabled)) + return true; + + val = REGB_RD32(MTL_BUTTRESS_VPU_STATUS); + return REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, READY, val) && + REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, IDLE, val); +} + +static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev) +{ + int ret = 0; + + /* FPGA requires manual clearing of IP_Reset bit by enabling quiescent state */ + if (ivpu_is_fpga(vdev)) { + if (ivpu_boot_host_ss_top_noc_disable(vdev)) { + ivpu_err(vdev, "Failed to disable TOP NOC\n"); + ret = -EIO; + } + + if (ivpu_boot_host_ss_axi_disable(vdev)) { + ivpu_err(vdev, "Failed to disable AXI\n"); + ret = -EIO; + } + } + + if (ivpu_boot_pwr_domain_disable(vdev)) { + ivpu_err(vdev, "Failed to disable power domain\n"); + ret = -EIO; + } + + if (ivpu_pll_disable(vdev)) { + ivpu_err(vdev, "Failed to disable PLL\n"); + ret = -EIO; + } + + if (ivpu_hw_mtl_d0i3_enable(vdev)) + ivpu_warn(vdev, "Failed to enable D0I3\n"); + + return ret; +} + +static void ivpu_hw_mtl_wdt_disable(struct ivpu_device *vdev) +{ + u32 val; + + /* Enable writing and set non-zero WDT value */ + REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + /* Enable writing and disable watchdog timer */ + REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0); + + /* Now clear the timeout interrupt */ + val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val); +} + +/* Register indirect accesses */ +static u32 ivpu_hw_mtl_reg_pll_freq_get(struct ivpu_device *vdev) +{ + u32 pll_curr_ratio; + + pll_curr_ratio = REGB_RD32(MTL_BUTTRESS_CURRENT_PLL); + pll_curr_ratio &= MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK; + + if (!ivpu_is_silicon(vdev)) + return PLL_SIMULATION_FREQ; + + return PLL_RATIO_TO_FREQ(pll_curr_ratio); +} + +static u32 ivpu_hw_mtl_reg_telemetry_offset_get(struct ivpu_device *vdev) +{ + return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_OFFSET); +} + +static u32 ivpu_hw_mtl_reg_telemetry_size_get(struct ivpu_device *vdev) +{ + return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_SIZE); +} + +static u32 ivpu_hw_mtl_reg_telemetry_enable_get(struct ivpu_device *vdev) +{ + return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_ENABLE); +} + +static void ivpu_hw_mtl_reg_db_set(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +static u32 ivpu_hw_mtl_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + return REGV_RD32(MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM); +} + +static u32 ivpu_hw_mtl_reg_ipc_rx_count_get(struct ivpu_device *vdev) +{ + u32 count = REGV_RD32_SILENT(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +static void ivpu_hw_mtl_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr); +} + +static void ivpu_hw_mtl_irq_clear(struct ivpu_device *vdev) +{ + REGV_WR64(MTL_VPU_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); +} + +static void ivpu_hw_mtl_irq_enable(struct ivpu_device *vdev) +{ + REGV_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); + REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); + REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); +} + +static void ivpu_hw_mtl_irq_disable(struct ivpu_device *vdev) +{ + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1); + REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); + REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGB_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, 0x0); +} + +static void ivpu_hw_mtl_irq_wdt_nce_handler(struct ivpu_device *vdev) +{ + ivpu_err_ratelimited(vdev, "WDT NCE irq\n"); +} + +static void ivpu_hw_mtl_irq_wdt_mss_handler(struct ivpu_device *vdev) +{ + ivpu_err_ratelimited(vdev, "WDT MSS irq\n"); + + ivpu_hw_wdt_disable(vdev); +} + +static void ivpu_hw_mtl_irq_noc_firewall_handler(struct ivpu_device *vdev) +{ + ivpu_err_ratelimited(vdev, "NOC Firewall irq\n"); +} + +/* Handler for IRQs from VPU core (irqV) */ +static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; + + REGV_WR32(MTL_VPU_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + ivpu_hw_mtl_irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + ivpu_hw_mtl_irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + ivpu_hw_mtl_irq_noc_firewall_handler(vdev); + + return status; +} + +/* Handler for IRQs from Buttress core (irqB) */ +static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; + + if (status == 0) + return 0; + + /* Disable global interrupt before handling local buttress interrupts */ + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(MTL_BUTTRESS_CURRENT_PLL)); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); + REGB_WR32(MTL_BUTTRESS_ATS_ERR_CLEAR, 0x1); + } + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) { + u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); + REGB_WR32(MTL_BUTTRESS_UFI_ERR_CLEAR, 0x1); + } + + /* + * Clear local interrupt status by writing 0 to all bits. + * This must be done after interrupts are cleared at the source. + * Writing 1 triggers an interrupt, so we can't perform read update write. + */ + REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, 0x0); + + /* Re-enable global interrupt */ + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); + + return status; +} + +static irqreturn_t ivpu_hw_mtl_irq_handler(int irq, void *ptr) +{ + struct ivpu_device *vdev = ptr; + u32 ret_irqv, ret_irqb; + + ret_irqv = ivpu_hw_mtl_irqv_handler(vdev, irq); + ret_irqb = ivpu_hw_mtl_irqb_handler(vdev, irq); + + return IRQ_RETVAL(ret_irqb | ret_irqv); +} + +static void ivpu_hw_mtl_diagnose_failure(struct ivpu_device *vdev) +{ + u32 irqv = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; + u32 irqb = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; + + if (ivpu_hw_mtl_reg_ipc_rx_count_get(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) { + u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); + } +} + +const struct ivpu_hw_ops ivpu_hw_mtl_ops = { + .info_init = ivpu_hw_mtl_info_init, + .power_up = ivpu_hw_mtl_power_up, + .is_idle = ivpu_hw_mtl_is_idle, + .power_down = ivpu_hw_mtl_power_down, + .boot_fw = ivpu_hw_mtl_boot_fw, + .wdt_disable = ivpu_hw_mtl_wdt_disable, + .diagnose_failure = ivpu_hw_mtl_diagnose_failure, + .reg_pll_freq_get = ivpu_hw_mtl_reg_pll_freq_get, + .reg_telemetry_offset_get = ivpu_hw_mtl_reg_telemetry_offset_get, + .reg_telemetry_size_get = ivpu_hw_mtl_reg_telemetry_size_get, + .reg_telemetry_enable_get = ivpu_hw_mtl_reg_telemetry_enable_get, + .reg_db_set = ivpu_hw_mtl_reg_db_set, + .reg_ipc_rx_addr_get = ivpu_hw_mtl_reg_ipc_rx_addr_get, + .reg_ipc_rx_count_get = ivpu_hw_mtl_reg_ipc_rx_count_get, + .reg_ipc_tx_set = ivpu_hw_mtl_reg_ipc_tx_set, + .irq_clear = ivpu_hw_mtl_irq_clear, + .irq_enable = ivpu_hw_mtl_irq_enable, + .irq_disable = ivpu_hw_mtl_irq_disable, + .irq_handler = ivpu_hw_mtl_irq_handler, +}; diff --git a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h new file mode 100644 index 000000000000..d83ccfd9a871 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_MTL_REG_H__ +#define __IVPU_HW_MTL_REG_H__ + +#include + +#define MTL_BUTTRESS_INTERRUPT_TYPE 0x00000000u + +#define MTL_BUTTRESS_INTERRUPT_STAT 0x00000004u +#define MTL_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define MTL_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define MTL_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) + +#define MTL_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u +#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define MTL_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu +#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define MTL_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u +#define MTL_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) + +#define MTL_BUTTRESS_WP_REQ_CMD 0x00000014u +#define MTL_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define MTL_BUTTRESS_WP_DOWNLOAD 0x00000018u +#define MTL_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) + +#define MTL_BUTTRESS_CURRENT_PLL 0x0000001cu +#define MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) + +#define MTL_BUTTRESS_PLL_ENABLE 0x00000020u + +#define MTL_BUTTRESS_FMIN_FUSE 0x00000024u +#define MTL_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define MTL_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define MTL_BUTTRESS_FMAX_FUSE 0x00000028u +#define MTL_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#define MTL_BUTTRESS_TILE_FUSE 0x0000002cu +#define MTL_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define MTL_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) + +#define MTL_BUTTRESS_LOCAL_INT_MASK 0x00000030u +#define MTL_BUTTRESS_GLOBAL_INT_MASK 0x00000034u + +#define MTL_BUTTRESS_PLL_STATUS 0x00000040u +#define MTL_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1) + +#define MTL_BUTTRESS_VPU_STATUS 0x00000044u +#define MTL_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) +#define MTL_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) + +#define MTL_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u +#define MTL_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define MTL_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define MTL_BUTTRESS_VPU_IP_RESET 0x00000050u +#define MTL_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define MTL_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u +#define MTL_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u +#define MTL_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u + +#define MTL_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u +#define MTL_BUTTRESS_ATS_ERR_LOG_1 0x000000a4u +#define MTL_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u + +#define MTL_BUTTRESS_UFI_ERR_LOG 0x000000b0u +#define MTL_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) +#define MTL_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) +#define MTL_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) + +#define MTL_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u + +#define MTL_VPU_HOST_SS_CPR_CLK_SET 0x00000084u +#define MTL_VPU_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) +#define MTL_VPU_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK BIT_MASK(11) + +#define MTL_VPU_HOST_SS_CPR_RST_SET 0x00000094u +#define MTL_VPU_HOST_SS_CPR_RST_SET_TOP_NOC_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_CPR_RST_SET_DSS_MAS_MASK BIT_MASK(10) +#define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11) + +#define MTL_VPU_HOST_SS_CPR_RST_CLR 0x00000098u +#define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10) +#define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11) + +#define MTL_VPU_HOST_SS_HW_VERSION 0x00000108u +#define MTL_VPU_HOST_SS_HW_VERSION_SOC_REVISION_MASK GENMASK(7, 0) +#define MTL_VPU_HOST_SS_HW_VERSION_SOC_NUMBER_MASK GENMASK(15, 8) +#define MTL_VPU_HOST_SS_HW_VERSION_VPU_GENERATION_MASK GENMASK(23, 16) + +#define MTL_VPU_HOST_SS_GEN_CTRL 0x00000118u +#define MTL_VPU_HOST_SS_GEN_CTRL_PS_MASK GENMASK(31, 29) + +#define MTL_VPU_HOST_SS_NOC_QREQN 0x00000154u +#define MTL_VPU_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_NOC_QACCEPTN 0x00000158u +#define MTL_VPU_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_NOC_QDENY 0x0000015cu +#define MTL_VPU_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define MTL_VPU_TOP_NOC_QREQN 0x00000160u +#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) +#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define MTL_VPU_TOP_NOC_QACCEPTN 0x00000164u +#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) +#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define MTL_VPU_TOP_NOC_QDENY 0x00000168u +#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) +#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN 0x00000170u +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK BIT_MASK(2) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK BIT_MASK(3) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK BIT_MASK(4) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK BIT_MASK(5) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK BIT_MASK(6) + +#define MTL_VPU_HOST_SS_ICB_STATUS_0 0x00010210u +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK BIT_MASK(2) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK BIT_MASK(3) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK BIT_MASK(4) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK BIT_MASK(8) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK BIT_MASK(30) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK BIT_MASK(31) + +#define MTL_VPU_HOST_SS_ICB_STATUS_1 0x00010214u +#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK BIT_MASK(2) + +#define MTL_VPU_HOST_SS_ICB_CLEAR_0 0x00010220u +#define MTL_VPU_HOST_SS_ICB_CLEAR_1 0x00010224u +#define MTL_VPU_HOST_SS_ICB_ENABLE_0 0x00010240u + +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM 0x000200f4u + +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT 0x000200fcu +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_READ_POINTER_MASK GENMASK(7, 0) +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_WRITE_POINTER_MASK GENMASK(15, 8) +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK GENMASK(23, 16) +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK GENMASK(31, 24) + +#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0 0x00030020u +#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0 0x00030024u +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0 0x00030028u +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN 0x00030200u +#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN_EN_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE 0x00030204u +#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO 0x00041040u +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_DONE_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK GENMASK(31, 3) + +#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u +#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK GENMASK(15, 0) +#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK GENMASK(31, 16) + +#define MTL_VPU_HOST_MMU_IDR0 0x00200000u +#define MTL_VPU_HOST_MMU_IDR1 0x00200004u +#define MTL_VPU_HOST_MMU_IDR3 0x0020000cu +#define MTL_VPU_HOST_MMU_IDR5 0x00200014u +#define MTL_VPU_HOST_MMU_CR0 0x00200020u +#define MTL_VPU_HOST_MMU_CR0ACK 0x00200024u +#define MTL_VPU_HOST_MMU_CR1 0x00200028u +#define MTL_VPU_HOST_MMU_CR2 0x0020002cu +#define MTL_VPU_HOST_MMU_IRQ_CTRL 0x00200050u +#define MTL_VPU_HOST_MMU_IRQ_CTRLACK 0x00200054u + +#define MTL_VPU_HOST_MMU_GERROR 0x00200060u +#define MTL_VPU_HOST_MMU_GERROR_CMDQ_MASK BIT_MASK(0) +#define MTL_VPU_HOST_MMU_GERROR_EVTQ_ABT_MASK BIT_MASK(2) +#define MTL_VPU_HOST_MMU_GERROR_PRIQ_ABT_MASK BIT_MASK(3) +#define MTL_VPU_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4) +#define MTL_VPU_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5) +#define MTL_VPU_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6) +#define MTL_VPU_HOST_MMU_GERROR_MSI_ABT_MASK BIT_MASK(7) + +#define MTL_VPU_HOST_MMU_GERRORN 0x00200064u + +#define MTL_VPU_HOST_MMU_STRTAB_BASE 0x00200080u +#define MTL_VPU_HOST_MMU_STRTAB_BASE_CFG 0x00200088u +#define MTL_VPU_HOST_MMU_CMDQ_BASE 0x00200090u +#define MTL_VPU_HOST_MMU_CMDQ_PROD 0x00200098u +#define MTL_VPU_HOST_MMU_CMDQ_CONS 0x0020009cu +#define MTL_VPU_HOST_MMU_EVTQ_BASE 0x002000a0u +#define MTL_VPU_HOST_MMU_EVTQ_PROD 0x002000a8u +#define MTL_VPU_HOST_MMU_EVTQ_CONS 0x002000acu +#define MTL_VPU_HOST_MMU_EVTQ_PROD_SEC (0x002000a8u + SZ_64K) +#define MTL_VPU_HOST_MMU_EVTQ_CONS_SEC (0x002000acu + SZ_64K) + +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES 0x00360000u +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK BIT_MASK(0) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK BIT_MASK(3) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK BIT_MASK(4) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK BIT_MASK(5) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK GENMASK(10, 6) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK GENMASK(15, 11) + +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV 0x00360004u +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK BIT_MASK(0) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK BIT_MASK(1) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK BIT_MASK(2) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK BIT_MASK(3) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK BIT_MASK(4) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK BIT_MASK(5) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK BIT_MASK(6) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK BIT_MASK(7) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK BIT_MASK(8) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK BIT_MASK(9) + +#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE 0x04000000u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u + +#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET 0x06010004u +#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK BIT_MASK(1) + +#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR 0x06010018u +#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK BIT_MASK(1) + +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC 0x06010040u +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK BIT_MASK(0) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK BIT_MASK(1) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK BIT_MASK(2) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK BIT_MASK(3) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK GENMASK(31, 4) + +#define MTL_VPU_CPU_SS_TIM_WATCHDOG 0x0602009cu +#define MTL_VPU_CPU_SS_TIM_WDOG_EN 0x060200a4u +#define MTL_VPU_CPU_SS_TIM_SAFE 0x060200a8u +#define MTL_VPU_CPU_SS_TIM_IPC_FIFO 0x060200f0u + +#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG 0x06021008u +#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) + +#define MTL_VPU_CPU_SS_DOORBELL_0 0x06300000u +#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) + +#define MTL_VPU_CPU_SS_DOORBELL_1 0x06301000u + +#endif /* __IVPU_HW_MTL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h new file mode 100644 index 000000000000..43c2c0c2d050 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_REG_IO_H__ +#define __IVPU_HW_REG_IO_H__ + +#include +#include +#include + +#include "ivpu_drv.h" + +#define REG_POLL_SLEEP_US 50 +#define REG_IO_ERROR 0xffffffff + +#define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__) +#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg)) +#define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__) +#define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__) +#define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__) + +#define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__) +#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg)) +#define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__) +#define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__) +#define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__) + +#define REGV_WR32I(reg, stride, index, val) \ + ivpu_hw_reg_wr32_index(vdev, vdev->regv, (reg), (stride), (index), (val), #reg, __func__) + +#define REG_FLD(REG, FLD) \ + (REG##_##FLD##_MASK) +#define REG_FLD_NUM(REG, FLD, num) \ + FIELD_PREP(REG##_##FLD##_MASK, num) +#define REG_GET_FLD(REG, FLD, val) \ + FIELD_GET(REG##_##FLD##_MASK, val) +#define REG_CLR_FLD(REG, FLD, val) \ + ((val) & ~(REG##_##FLD##_MASK)) +#define REG_SET_FLD(REG, FLD, val) \ + ((val) | (REG##_##FLD##_MASK)) +#define REG_SET_FLD_NUM(REG, FLD, num, val) \ + (((val) & ~(REG##_##FLD##_MASK)) | FIELD_PREP(REG##_##FLD##_MASK, num)) +#define REG_TEST_FLD(REG, FLD, val) \ + ((REG##_##FLD##_MASK) == ((val) & (REG##_##FLD##_MASK))) +#define REG_TEST_FLD_NUM(REG, FLD, num, val) \ + ((num) == FIELD_GET(REG##_##FLD##_MASK, val)) + +#define REGB_POLL(reg, var, cond, timeout_us) \ + read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) + +#define REGV_POLL(reg, var, cond, timeout_us) \ + read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) + +#define REGB_POLL_FLD(reg, fld, val, timeout_us) \ +({ \ + u32 var; \ + REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ +}) + +#define REGV_POLL_FLD(reg, fld, val, timeout_us) \ +({ \ + u32 var; \ + REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ +}) + +static inline u32 +ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg, + const char *name, const char *func) +{ + u32 val = readl(base + reg); + + ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val); + return val; +} + +static inline u64 +ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg, + const char *name, const char *func) +{ + u64 val = readq(base + reg); + + ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val); + return val; +} + +static inline void +ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val, + const char *name, const char *func) +{ + ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val); + writel(val, base + reg); +} + +static inline void +ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val, + const char *name, const char *func) +{ + ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val); + writeq(val, base + reg); +} + +static inline void +ivpu_hw_reg_wr32_index(struct ivpu_device *vdev, void __iomem *base, u32 reg, + u32 stride, u32 index, u32 val, const char *name, + const char *func) +{ + reg += index * stride; + + ivpu_dbg(vdev, REG, "%s WR: %s_%d (0x%08x) <= 0x%08x\n", func, name, index, reg, val); + writel(val, base + reg); +} + +#endif /* __IVPU_HW_REG_IO_H__ */ diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h new file mode 100644 index 000000000000..c6a98977eb8e --- /dev/null +++ b/include/uapi/drm/ivpu_accel.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __UAPI_IVPU_DRM_H__ +#define __UAPI_IVPU_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IVPU_DRIVER_MAJOR 1 +#define DRM_IVPU_DRIVER_MINOR 0 + +#define DRM_IVPU_GET_PARAM 0x00 +#define DRM_IVPU_SET_PARAM 0x01 + +#define DRM_IOCTL_IVPU_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) + +#define DRM_IOCTL_IVPU_SET_PARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param) + +/** + * DOC: contexts + * + * VPU contexts have private virtual address space, job queues and priority. + * Each context is identified by an unique ID. Context is created on open(). + */ + +#define DRM_IVPU_PARAM_DEVICE_ID 0 +#define DRM_IVPU_PARAM_DEVICE_REVISION 1 +#define DRM_IVPU_PARAM_PLATFORM_TYPE 2 +#define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3 +#define DRM_IVPU_PARAM_NUM_CONTEXTS 4 +#define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 +#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 + +#define DRM_IVPU_PLATFORM_TYPE_SILICON 0 + +#define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0 +#define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1 +#define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 +#define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 + +/** + * struct drm_ivpu_param - Get/Set VPU parameters + */ +struct drm_ivpu_param { + /** + * @param: + * + * Supported params: + * + * %DRM_IVPU_PARAM_DEVICE_ID: + * PCI Device ID of the VPU device (read-only) + * + * %DRM_IVPU_PARAM_DEVICE_REVISION: + * VPU device revision (read-only) + * + * %DRM_IVPU_PARAM_PLATFORM_TYPE: + * Returns %DRM_IVPU_PLATFORM_TYPE_SILICON on real hardware or device specific + * platform type when executing on a simulator or emulator (read-only) + * + * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: + * Current PLL frequency (read-only) + * + * %DRM_IVPU_PARAM_NUM_CONTEXTS: + * Maximum number of simultaneously existing contexts (read-only) + * + * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: + * Lowest VPU virtual address available in the current context (read-only) + * + * %DRM_IVPU_PARAM_CONTEXT_PRIORITY: + * Value of current context scheduling priority (read-write). + * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. + * + */ + __u32 param; + + /** @index: Index for params that have multiple instances */ + __u32 index; + + /** @value: Param value */ + __u64 value; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* __UAPI_IVPU_DRM_H__ */ From 263b2ba5fc93c875129e0d2b4034d7d8a34b3d39 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:18 +0100 Subject: [PATCH 15/45] accel/ivpu: Add Intel VPU MMU support VPU Memory Management Unit is based on ARM MMU-600. It allows the creation of multiple virtual address spaces for the device and map noncontinuous host memory (there is no dedicated memory on the VPU). Address space is implemented as a struct ivpu_mmu_context, it has an ID, drm_mm allocator for VPU addresses and struct ivpu_mmu_pgtable that holds actual 3-level, 4KB page table. Context with ID 0 (global context) is created upon driver initialization and it's mainly used for mapping memory required to execute the firmware. Contexts with non-zero IDs are user contexts allocated each time the devices is open()-ed and they map command buffers and other workload-related memory. Workloads executing in a given contexts have access only to the memory mapped in this context. This patch is has two main files: - ivpu_mmu_context.c handles MMU page tables and memory mapping - ivpu_mmu.c implements a driver that programs the MMU device Co-developed-by: Karol Wachowski Signed-off-by: Karol Wachowski Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 4 +- drivers/accel/ivpu/ivpu_drv.c | 83 ++- drivers/accel/ivpu/ivpu_drv.h | 7 + drivers/accel/ivpu/ivpu_hw_mtl.c | 10 + drivers/accel/ivpu/ivpu_mmu.c | 876 ++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_mmu.h | 50 ++ drivers/accel/ivpu/ivpu_mmu_context.c | 398 ++++++++++++ drivers/accel/ivpu/ivpu_mmu_context.h | 50 ++ include/uapi/drm/ivpu_accel.h | 4 + 9 files changed, 1479 insertions(+), 3 deletions(-) create mode 100644 drivers/accel/ivpu/ivpu_mmu.c create mode 100644 drivers/accel/ivpu/ivpu_mmu.h create mode 100644 drivers/accel/ivpu/ivpu_mmu_context.c create mode 100644 drivers/accel/ivpu/ivpu_mmu_context.h diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 2cc8394101cd..59cd7843b218 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -3,6 +3,8 @@ intel_vpu-y := \ ivpu_drv.o \ - ivpu_hw_mtl.o + ivpu_hw_mtl.o \ + ivpu_mmu.o \ + ivpu_mmu_context.o obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o \ No newline at end of file diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 1134fb0028ad..69ca7dae8b57 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -15,6 +15,8 @@ #include "ivpu_drv.h" #include "ivpu_hw.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" #ifndef DRIVER_VERSION_STR #define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ @@ -37,23 +39,38 @@ MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) { + struct ivpu_device *vdev = file_priv->vdev; + kref_get(&file_priv->ref); + + ivpu_dbg(vdev, KREF, "file_priv get: ctx %u refcount %u\n", + file_priv->ctx.id, kref_read(&file_priv->ref)); + return file_priv; } static void file_priv_release(struct kref *ref) { struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); + struct ivpu_device *vdev = file_priv->vdev; + ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + WARN_ON(xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); kfree(file_priv); } void ivpu_file_priv_put(struct ivpu_file_priv **link) { struct ivpu_file_priv *file_priv = *link; + struct ivpu_device *vdev = file_priv->vdev; WARN_ON(!file_priv); + ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", + file_priv->ctx.id, kref_read(&file_priv->ref)); + *link = NULL; kref_put(&file_priv->ref, file_priv_release); } @@ -88,6 +105,9 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f case DRM_IVPU_PARAM_CONTEXT_PRIORITY: args->value = file_priv->priority; break; + case DRM_IVPU_PARAM_CONTEXT_ID: + args->value = file_priv->ctx.id; + break; default: ret = -EINVAL; break; @@ -120,22 +140,59 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) { struct ivpu_device *vdev = to_ivpu_device(dev); struct ivpu_file_priv *file_priv; + u32 ctx_id; + void *old; + int ret; + + ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL); + if (ret) { + ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); + return ret; + } file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); - if (!file_priv) - return -ENOMEM; + if (!file_priv) { + ret = -ENOMEM; + goto err_xa_erase; + } file_priv->vdev = vdev; file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; kref_init(&file_priv->ref); + ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); + if (ret) + goto err_free_file_priv; + + old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); + if (xa_is_err(old)) { + ret = xa_err(old); + ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret); + goto err_ctx_fini; + } + + ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n", + ctx_id, current->comm, task_pid_nr(current)); + file->driver_priv = file_priv; return 0; + +err_ctx_fini: + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); +err_free_file_priv: + kfree(file_priv); +err_xa_erase: + xa_erase_irq(&vdev->context_xa, ctx_id); + return ret; } static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = to_ivpu_device(dev); + + ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", + file_priv->ctx.id, current->comm, task_pid_nr(current)); ivpu_file_priv_put(&file_priv); } @@ -150,6 +207,7 @@ int ivpu_shutdown(struct ivpu_device *vdev) int ret; ivpu_hw_irq_disable(vdev); + ivpu_mmu_disable(vdev); ret = ivpu_hw_power_down(vdev); if (ret) @@ -251,6 +309,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->hw) return -ENOMEM; + vdev->mmu = drmm_kzalloc(&vdev->drm, sizeof(*vdev->mmu), GFP_KERNEL); + if (!vdev->mmu) + return -ENOMEM; + vdev->hw->ops = &ivpu_hw_mtl_ops; vdev->platform = IVPU_PLATFORM_INVALID; vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; @@ -283,8 +345,24 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_xa_destroy; } + ret = ivpu_mmu_global_context_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret); + goto err_power_down; + } + + ret = ivpu_mmu_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret); + goto err_mmu_gctx_fini; + } + return 0; +err_mmu_gctx_fini: + ivpu_mmu_global_context_fini(vdev); +err_power_down: + ivpu_hw_power_down(vdev); err_xa_destroy: xa_destroy(&vdev->context_xa); return ret; @@ -293,6 +371,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_shutdown(vdev); + ivpu_mmu_global_context_fini(vdev); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); xa_destroy(&vdev->context_xa); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index d0b006893b1c..b9ce66f40696 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -15,6 +15,8 @@ #include #include +#include "ivpu_mmu_context.h" + #define DRIVER_NAME "intel_vpu" #define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" #define DRIVER_DATE "20230117" @@ -71,6 +73,7 @@ struct ivpu_wa_table { }; struct ivpu_hw_info; +struct ivpu_mmu_info; struct ivpu_device { struct drm_device drm; @@ -81,7 +84,9 @@ struct ivpu_device { struct ivpu_wa_table wa; struct ivpu_hw_info *hw; + struct ivpu_mmu_info *mmu; + struct ivpu_mmu_context gctx; struct xarray context_xa; struct xa_limit context_xa_limit; @@ -100,7 +105,9 @@ struct ivpu_device { struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; + struct ivpu_mmu_context ctx; u32 priority; + bool has_mmu_faults; }; extern int ivpu_dbg_mask; diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 8a1b1c03f77b..eaba2b8248b8 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -7,6 +7,7 @@ #include "ivpu_hw_mtl_reg.h" #include "ivpu_hw_reg_io.h" #include "ivpu_hw.h" +#include "ivpu_mmu.h" #define TILE_FUSE_ENABLE_BOTH 0x0 #define TILE_FUSE_ENABLE_UPPER 0x1 @@ -930,6 +931,15 @@ static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) REGV_WR32(MTL_VPU_HOST_SS_ICB_CLEAR_0, status); + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) ivpu_hw_mtl_irq_wdt_mss_handler(vdev); diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c new file mode 100644 index 000000000000..ff5ef6da1fd3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -0,0 +1,876 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_hw_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" + +#define IVPU_MMU_IDR0_REF 0x080f3e0f +#define IVPU_MMU_IDR0_REF_SIMICS 0x080f3e1f +#define IVPU_MMU_IDR1_REF 0x0e739d18 +#define IVPU_MMU_IDR3_REF 0x0000003c +#define IVPU_MMU_IDR5_REF 0x00040070 +#define IVPU_MMU_IDR5_REF_SIMICS 0x00000075 +#define IVPU_MMU_IDR5_REF_FPGA 0x00800075 + +#define IVPU_MMU_CDTAB_ENT_SIZE 64 +#define IVPU_MMU_CDTAB_ENT_COUNT_LOG2 8 /* 256 entries */ +#define IVPU_MMU_CDTAB_ENT_COUNT ((u32)1 << IVPU_MMU_CDTAB_ENT_COUNT_LOG2) + +#define IVPU_MMU_STREAM_ID0 0 +#define IVPU_MMU_STREAM_ID3 3 + +#define IVPU_MMU_STRTAB_ENT_SIZE 64 +#define IVPU_MMU_STRTAB_ENT_COUNT 4 +#define IVPU_MMU_STRTAB_CFG_LOG2SIZE 2 +#define IVPU_MMU_STRTAB_CFG IVPU_MMU_STRTAB_CFG_LOG2SIZE + +#define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */ +#define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2) +#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1) +#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1) +#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) +#define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK) + +#define IVPU_MMU_CMDQ_CMD_SIZE 16 +#define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE) + +#define IVPU_MMU_EVTQ_CMD_SIZE 32 +#define IVPU_MMU_EVTQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_EVTQ_CMD_SIZE) + +#define IVPU_MMU_CMD_OPCODE GENMASK(7, 0) + +#define IVPU_MMU_CMD_SYNC_0_CS GENMASK(13, 12) +#define IVPU_MMU_CMD_SYNC_0_MSH GENMASK(23, 22) +#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR GENMASK(27, 24) +#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR GENMASK(27, 24) +#define IVPU_MMU_CMD_SYNC_0_MSI_DATA GENMASK(63, 32) + +#define IVPU_MMU_CMD_CFGI_0_SSEC BIT(10) +#define IVPU_MMU_CMD_CFGI_0_SSV BIT(11) +#define IVPU_MMU_CMD_CFGI_0_SSID GENMASK(31, 12) +#define IVPU_MMU_CMD_CFGI_0_SID GENMASK(63, 32) +#define IVPU_MMU_CMD_CFGI_1_RANGE GENMASK(4, 0) + +#define IVPU_MMU_CMD_TLBI_0_ASID GENMASK(63, 48) +#define IVPU_MMU_CMD_TLBI_0_VMID GENMASK(47, 32) + +#define CMD_PREFETCH_CFG 0x1 +#define CMD_CFGI_STE 0x3 +#define CMD_CFGI_ALL 0x4 +#define CMD_CFGI_CD 0x5 +#define CMD_CFGI_CD_ALL 0x6 +#define CMD_TLBI_NH_ASID 0x11 +#define CMD_TLBI_EL2_ALL 0x20 +#define CMD_TLBI_NSNH_ALL 0x30 +#define CMD_SYNC 0x46 + +#define IVPU_MMU_EVT_F_UUT 0x01 +#define IVPU_MMU_EVT_C_BAD_STREAMID 0x02 +#define IVPU_MMU_EVT_F_STE_FETCH 0x03 +#define IVPU_MMU_EVT_C_BAD_STE 0x04 +#define IVPU_MMU_EVT_F_BAD_ATS_TREQ 0x05 +#define IVPU_MMU_EVT_F_STREAM_DISABLED 0x06 +#define IVPU_MMU_EVT_F_TRANSL_FORBIDDEN 0x07 +#define IVPU_MMU_EVT_C_BAD_SUBSTREAMID 0x08 +#define IVPU_MMU_EVT_F_CD_FETCH 0x09 +#define IVPU_MMU_EVT_C_BAD_CD 0x0a +#define IVPU_MMU_EVT_F_WALK_EABT 0x0b +#define IVPU_MMU_EVT_F_TRANSLATION 0x10 +#define IVPU_MMU_EVT_F_ADDR_SIZE 0x11 +#define IVPU_MMU_EVT_F_ACCESS 0x12 +#define IVPU_MMU_EVT_F_PERMISSION 0x13 +#define IVPU_MMU_EVT_F_TLB_CONFLICT 0x20 +#define IVPU_MMU_EVT_F_CFG_CONFLICT 0x21 +#define IVPU_MMU_EVT_E_PAGE_REQUEST 0x24 +#define IVPU_MMU_EVT_F_VMS_FETCH 0x25 + +#define IVPU_MMU_EVT_OP_MASK GENMASK_ULL(7, 0) +#define IVPU_MMU_EVT_SSID_MASK GENMASK_ULL(31, 12) + +#define IVPU_MMU_Q_BASE_RWA BIT(62) +#define IVPU_MMU_Q_BASE_ADDR_MASK GENMASK_ULL(51, 5) +#define IVPU_MMU_STRTAB_BASE_RA BIT(62) +#define IVPU_MMU_STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6) + +#define IVPU_MMU_IRQ_EVTQ_EN BIT(2) +#define IVPU_MMU_IRQ_GERROR_EN BIT(0) + +#define IVPU_MMU_CR0_ATSCHK BIT(4) +#define IVPU_MMU_CR0_CMDQEN BIT(3) +#define IVPU_MMU_CR0_EVTQEN BIT(2) +#define IVPU_MMU_CR0_PRIQEN BIT(1) +#define IVPU_MMU_CR0_SMMUEN BIT(0) + +#define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10) +#define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8) +#define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6) +#define IVPU_MMU_CR1_QUEUE_SH GENMASK(5, 4) +#define IVPU_MMU_CR1_QUEUE_OC GENMASK(3, 2) +#define IVPU_MMU_CR1_QUEUE_IC GENMASK(1, 0) +#define IVPU_MMU_CACHE_NC 0 +#define IVPU_MMU_CACHE_WB 1 +#define IVPU_MMU_CACHE_WT 2 +#define IVPU_MMU_SH_NSH 0 +#define IVPU_MMU_SH_OSH 2 +#define IVPU_MMU_SH_ISH 3 + +#define IVPU_MMU_CMDQ_OP GENMASK_ULL(7, 0) + +#define IVPU_MMU_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) +#define IVPU_MMU_CD_0_TCR_TG0 GENMASK_ULL(7, 6) +#define IVPU_MMU_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) +#define IVPU_MMU_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10) +#define IVPU_MMU_CD_0_TCR_SH0 GENMASK_ULL(13, 12) +#define IVPU_MMU_CD_0_TCR_EPD0 BIT_ULL(14) +#define IVPU_MMU_CD_0_TCR_EPD1 BIT_ULL(30) +#define IVPU_MMU_CD_0_ENDI BIT(15) +#define IVPU_MMU_CD_0_V BIT(31) +#define IVPU_MMU_CD_0_TCR_IPS GENMASK_ULL(34, 32) +#define IVPU_MMU_CD_0_TCR_TBI0 BIT_ULL(38) +#define IVPU_MMU_CD_0_AA64 BIT(41) +#define IVPU_MMU_CD_0_S BIT(44) +#define IVPU_MMU_CD_0_R BIT(45) +#define IVPU_MMU_CD_0_A BIT(46) +#define IVPU_MMU_CD_0_ASET BIT(47) +#define IVPU_MMU_CD_0_ASID GENMASK_ULL(63, 48) + +#define IVPU_MMU_CD_1_TTB0_MASK GENMASK_ULL(51, 4) + +#define IVPU_MMU_STE_0_S1CDMAX GENMASK_ULL(63, 59) +#define IVPU_MMU_STE_0_S1FMT GENMASK_ULL(5, 4) +#define IVPU_MMU_STE_0_S1FMT_LINEAR 0 +#define IVPU_MMU_STE_DWORDS 8 +#define IVPU_MMU_STE_0_CFG_S1_TRANS 5 +#define IVPU_MMU_STE_0_CFG GENMASK_ULL(3, 1) +#define IVPU_MMU_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) +#define IVPU_MMU_STE_0_V BIT(0) + +#define IVPU_MMU_STE_1_STRW_NSEL1 0ul +#define IVPU_MMU_STE_1_CONT GENMASK_ULL(16, 13) +#define IVPU_MMU_STE_1_STRW GENMASK_ULL(31, 30) +#define IVPU_MMU_STE_1_PRIVCFG GENMASK_ULL(49, 48) +#define IVPU_MMU_STE_1_PRIVCFG_UNPRIV 2ul +#define IVPU_MMU_STE_1_INSTCFG GENMASK_ULL(51, 50) +#define IVPU_MMU_STE_1_INSTCFG_DATA 2ul +#define IVPU_MMU_STE_1_MEV BIT(19) +#define IVPU_MMU_STE_1_S1STALLD BIT(27) +#define IVPU_MMU_STE_1_S1C_CACHE_NC 0ul +#define IVPU_MMU_STE_1_S1C_CACHE_WBRA 1ul +#define IVPU_MMU_STE_1_S1C_CACHE_WT 2ul +#define IVPU_MMU_STE_1_S1C_CACHE_WB 3ul +#define IVPU_MMU_STE_1_S1CIR GENMASK_ULL(3, 2) +#define IVPU_MMU_STE_1_S1COR GENMASK_ULL(5, 4) +#define IVPU_MMU_STE_1_S1CSH GENMASK_ULL(7, 6) +#define IVPU_MMU_STE_1_S1DSS GENMASK_ULL(1, 0) +#define IVPU_MMU_STE_1_S1DSS_TERMINATE 0x0 + +#define IVPU_MMU_REG_TIMEOUT_US (10 * USEC_PER_MSEC) +#define IVPU_MMU_QUEUE_TIMEOUT_US (100 * USEC_PER_MSEC) + +#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT))) + +static char *ivpu_mmu_event_to_str(u32 cmd) +{ + switch (cmd) { + case IVPU_MMU_EVT_F_UUT: + return "Unsupported Upstream Transaction"; + case IVPU_MMU_EVT_C_BAD_STREAMID: + return "Transaction StreamID out of range"; + case IVPU_MMU_EVT_F_STE_FETCH: + return "Fetch of STE caused external abort"; + case IVPU_MMU_EVT_C_BAD_STE: + return "Used STE invalid"; + case IVPU_MMU_EVT_F_BAD_ATS_TREQ: + return "Address Request disallowed for a StreamID"; + case IVPU_MMU_EVT_F_STREAM_DISABLED: + return "Transaction marks non-substream disabled"; + case IVPU_MMU_EVT_F_TRANSL_FORBIDDEN: + return "MMU bypass is disallowed for this StreamID"; + case IVPU_MMU_EVT_C_BAD_SUBSTREAMID: + return "Invalid StreamID"; + case IVPU_MMU_EVT_F_CD_FETCH: + return "Fetch of CD caused external abort"; + case IVPU_MMU_EVT_C_BAD_CD: + return "Fetched CD invalid"; + case IVPU_MMU_EVT_F_WALK_EABT: + return " An external abort occurred fetching a TLB"; + case IVPU_MMU_EVT_F_TRANSLATION: + return "Translation fault"; + case IVPU_MMU_EVT_F_ADDR_SIZE: + return " Output address caused address size fault"; + case IVPU_MMU_EVT_F_ACCESS: + return "Access flag fault"; + case IVPU_MMU_EVT_F_PERMISSION: + return "Permission fault occurred on page access"; + case IVPU_MMU_EVT_F_TLB_CONFLICT: + return "A TLB conflict"; + case IVPU_MMU_EVT_F_CFG_CONFLICT: + return "A configuration cache conflict"; + case IVPU_MMU_EVT_E_PAGE_REQUEST: + return "Page request hint from a client device"; + case IVPU_MMU_EVT_F_VMS_FETCH: + return "Fetch of VMS caused external abort"; + default: + return "Unknown CMDQ command"; + } +} + +static void ivpu_mmu_config_check(struct ivpu_device *vdev) +{ + u32 val_ref; + u32 val; + + if (ivpu_is_simics(vdev)) + val_ref = IVPU_MMU_IDR0_REF_SIMICS; + else + val_ref = IVPU_MMU_IDR0_REF; + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR0); + if (val != val_ref) + ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref); + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR1); + if (val != IVPU_MMU_IDR1_REF) + ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF); + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR3); + if (val != IVPU_MMU_IDR3_REF) + ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF); + + if (ivpu_is_simics(vdev)) + val_ref = IVPU_MMU_IDR5_REF_SIMICS; + else if (ivpu_is_fpga(vdev)) + val_ref = IVPU_MMU_IDR5_REF_FPGA; + else + val_ref = IVPU_MMU_IDR5_REF; + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR5); + if (val != val_ref) + ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref); +} + +static int ivpu_mmu_cdtab_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + size_t size = IVPU_MMU_CDTAB_ENT_COUNT * IVPU_MMU_CDTAB_ENT_SIZE; + + cdtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &cdtab->dma, GFP_KERNEL); + if (!cdtab->base) + return -ENOMEM; + + ivpu_dbg(vdev, MMU, "CDTAB alloc: dma=%pad size=%zu\n", &cdtab->dma, size); + + return 0; +} + +static int ivpu_mmu_strtab_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_strtab *strtab = &mmu->strtab; + size_t size = IVPU_MMU_STRTAB_ENT_COUNT * IVPU_MMU_STRTAB_ENT_SIZE; + + strtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &strtab->dma, GFP_KERNEL); + if (!strtab->base) + return -ENOMEM; + + strtab->base_cfg = IVPU_MMU_STRTAB_CFG; + strtab->dma_q = IVPU_MMU_STRTAB_BASE_RA; + strtab->dma_q |= strtab->dma & IVPU_MMU_STRTAB_BASE_ADDR_MASK; + + ivpu_dbg(vdev, MMU, "STRTAB alloc: dma=%pad dma_q=%pad size=%zu\n", + &strtab->dma, &strtab->dma_q, size); + + return 0; +} + +static int ivpu_mmu_cmdq_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_queue *q = &mmu->cmdq; + + q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_CMDQ_SIZE, &q->dma, GFP_KERNEL); + if (!q->base) + return -ENOMEM; + + q->dma_q = IVPU_MMU_Q_BASE_RWA; + q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK; + q->dma_q |= IVPU_MMU_Q_COUNT_LOG2; + + ivpu_dbg(vdev, MMU, "CMDQ alloc: dma=%pad dma_q=%pad size=%u\n", + &q->dma, &q->dma_q, IVPU_MMU_CMDQ_SIZE); + + return 0; +} + +static int ivpu_mmu_evtq_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_queue *q = &mmu->evtq; + + q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_EVTQ_SIZE, &q->dma, GFP_KERNEL); + if (!q->base) + return -ENOMEM; + + q->dma_q = IVPU_MMU_Q_BASE_RWA; + q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK; + q->dma_q |= IVPU_MMU_Q_COUNT_LOG2; + + ivpu_dbg(vdev, MMU, "EVTQ alloc: dma=%pad dma_q=%pad size=%u\n", + &q->dma, &q->dma_q, IVPU_MMU_EVTQ_SIZE); + + return 0; +} + +static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_mmu_cdtab_alloc(vdev); + if (ret) { + ivpu_err(vdev, "Failed to allocate cdtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_strtab_alloc(vdev); + if (ret) { + ivpu_err(vdev, "Failed to allocate strtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_cmdq_alloc(vdev); + if (ret) { + ivpu_err(vdev, "Failed to allocate cmdq: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_evtq_alloc(vdev); + if (ret) + ivpu_err(vdev, "Failed to allocate evtq: %d\n", ret); + + return ret; +} + +static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val) +{ + u32 reg_ack = reg + 4; /* ACK register is 4B after base register */ + u32 val_ack; + int ret; + + REGV_WR32(reg, val); + + ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to write register 0x%x\n", reg); + + return ret; +} + +static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) +{ + u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN; + int ret; + + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, 0); + if (ret) + return ret; + + return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, irq_ctrl); +} + +static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev) +{ + struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; + + return REGV_POLL(MTL_VPU_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons), + IVPU_MMU_QUEUE_TIMEOUT_US); +} + +static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1) +{ + struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; + u64 *queue_buffer = q->base; + int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); + + if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) { + ivpu_err(vdev, "Failed to write MMU CMD %s\n", name); + return -EBUSY; + } + + queue_buffer[idx] = data0; + queue_buffer[idx + 1] = data1; + q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK; + + ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1); + + return 0; +} + +static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) +{ + struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; + u64 val; + int ret; + + val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) | + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) | + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) | + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf); + + ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0); + if (ret) + return ret; + + clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, q->prod); + + ret = ivpu_mmu_cmdq_wait_for_cons(vdev); + if (ret) + ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret); + + return ret; +} + +static int ivpu_mmu_cmdq_write_cfgi_all(struct ivpu_device *vdev) +{ + u64 data0 = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_CFGI_ALL); + u64 data1 = FIELD_PREP(IVPU_MMU_CMD_CFGI_1_RANGE, 0x1f); + + return ivpu_mmu_cmdq_cmd_write(vdev, "CFGI_ALL", data0, data1); +} + +static int ivpu_mmu_cmdq_write_tlbi_nh_asid(struct ivpu_device *vdev, u16 ssid) +{ + u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NH_ASID) | + FIELD_PREP(IVPU_MMU_CMD_TLBI_0_ASID, ssid); + + return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NH_ASID", val, 0); +} + +static int ivpu_mmu_cmdq_write_tlbi_nsnh_all(struct ivpu_device *vdev) +{ + u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NSNH_ALL); + + return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NSNH_ALL", val, 0); +} + +static int ivpu_mmu_reset(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + u32 val; + int ret; + + memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE); + clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); + mmu->cmdq.prod = 0; + mmu->cmdq.cons = 0; + + memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); + clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); + mmu->evtq.prod = 0; + mmu->evtq.cons = 0; + + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, 0); + if (ret) + return ret; + + val = FIELD_PREP(IVPU_MMU_CR1_TABLE_SH, IVPU_MMU_SH_ISH) | + FIELD_PREP(IVPU_MMU_CR1_TABLE_OC, IVPU_MMU_CACHE_WB) | + FIELD_PREP(IVPU_MMU_CR1_TABLE_IC, IVPU_MMU_CACHE_WB) | + FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) | + FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) | + FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB); + REGV_WR32(MTL_VPU_HOST_MMU_CR1, val); + + REGV_WR64(MTL_VPU_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q); + REGV_WR32(MTL_VPU_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg); + + REGV_WR64(MTL_VPU_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q); + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, 0); + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_CONS, 0); + + val = IVPU_MMU_CR0_CMDQEN; + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + if (ret) + return ret; + + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); + if (ret) + return ret; + + ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev); + if (ret) + return ret; + + ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + return ret; + + REGV_WR64(MTL_VPU_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q); + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC, 0); + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, 0); + + val |= IVPU_MMU_CR0_EVTQEN; + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + if (ret) + return ret; + + val |= IVPU_MMU_CR0_ATSCHK; + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + if (ret) + return ret; + + ret = ivpu_mmu_irqs_setup(vdev); + if (ret) + return ret; + + val |= IVPU_MMU_CR0_SMMUEN; + return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); +} + +static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_strtab *strtab = &mmu->strtab; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + u64 *entry = strtab->base + (sid * IVPU_MMU_STRTAB_ENT_SIZE); + u64 str[2]; + + str[0] = FIELD_PREP(IVPU_MMU_STE_0_CFG, IVPU_MMU_STE_0_CFG_S1_TRANS) | + FIELD_PREP(IVPU_MMU_STE_0_S1CDMAX, IVPU_MMU_CDTAB_ENT_COUNT_LOG2) | + FIELD_PREP(IVPU_MMU_STE_0_S1FMT, IVPU_MMU_STE_0_S1FMT_LINEAR) | + IVPU_MMU_STE_0_V | + (cdtab->dma & IVPU_MMU_STE_0_S1CTXPTR_MASK); + + str[1] = FIELD_PREP(IVPU_MMU_STE_1_S1DSS, IVPU_MMU_STE_1_S1DSS_TERMINATE) | + FIELD_PREP(IVPU_MMU_STE_1_S1CIR, IVPU_MMU_STE_1_S1C_CACHE_NC) | + FIELD_PREP(IVPU_MMU_STE_1_S1COR, IVPU_MMU_STE_1_S1C_CACHE_NC) | + FIELD_PREP(IVPU_MMU_STE_1_S1CSH, IVPU_MMU_SH_NSH) | + FIELD_PREP(IVPU_MMU_STE_1_PRIVCFG, IVPU_MMU_STE_1_PRIVCFG_UNPRIV) | + FIELD_PREP(IVPU_MMU_STE_1_INSTCFG, IVPU_MMU_STE_1_INSTCFG_DATA) | + FIELD_PREP(IVPU_MMU_STE_1_STRW, IVPU_MMU_STE_1_STRW_NSEL1) | + FIELD_PREP(IVPU_MMU_STE_1_CONT, IVPU_MMU_STRTAB_CFG_LOG2SIZE) | + IVPU_MMU_STE_1_MEV | + IVPU_MMU_STE_1_S1STALLD; + + WRITE_ONCE(entry[1], str[1]); + WRITE_ONCE(entry[0], str[0]); + + clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); + + ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]); +} + +static int ivpu_mmu_strtab_init(struct ivpu_device *vdev) +{ + ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID0); + ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID3); + + return 0; +} + +int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + int ret; + + ret = mutex_lock_interruptible(&mmu->lock); + if (ret) + return ret; + + if (!mmu->on) { + ret = 0; + goto unlock; + } + + ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid); + if (ret) + goto unlock; + + ret = ivpu_mmu_cmdq_sync(vdev); +unlock: + mutex_unlock(&mmu->lock); + return ret; +} + +static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + u64 *entry; + u64 cd[4]; + int ret; + + if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) + return -EINVAL; + + entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); + + if (cd_dma != 0) { + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) | + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | + IVPU_MMU_CD_0_TCR_EPD1 | + IVPU_MMU_CD_0_AA64 | + IVPU_MMU_CD_0_R | + IVPU_MMU_CD_0_ASET | + IVPU_MMU_CD_0_V; + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; + cd[2] = 0; + cd[3] = 0x0000000000007444; + + /* For global context generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + cd[0] |= IVPU_MMU_CD_0_A; + } else { + memset(cd, 0, sizeof(cd)); + } + + WRITE_ONCE(entry[1], cd[1]); + WRITE_ONCE(entry[2], cd[2]); + WRITE_ONCE(entry[3], cd[3]); + WRITE_ONCE(entry[0], cd[0]); + + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); + + ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); + + ret = mutex_lock_interruptible(&mmu->lock); + if (ret) + return ret; + + if (!mmu->on) { + ret = 0; + goto unlock; + } + + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); + if (ret) + goto unlock; + + ret = ivpu_mmu_cmdq_sync(vdev); +unlock: + mutex_unlock(&mmu->lock); + return ret; +} + +static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); + if (ret) + ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); + + return ret; +} + +static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) +{ + int ret; + + if (ssid == 0) { + ivpu_err(vdev, "Invalid SSID: %u\n", ssid); + return -EINVAL; + } + + ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); + if (ret) + ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); + + return ret; +} + +int ivpu_mmu_init(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + int ret; + + ivpu_dbg(vdev, MMU, "Init..\n"); + + drmm_mutex_init(&vdev->drm, &mmu->lock); + ivpu_mmu_config_check(vdev); + + ret = ivpu_mmu_structs_alloc(vdev); + if (ret) + return ret; + + ret = ivpu_mmu_strtab_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_cd_add_gbl(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); + return ret; + } + + ivpu_dbg(vdev, MMU, "Init done\n"); + + return 0; +} + +int ivpu_mmu_enable(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + int ret; + + mutex_lock(&mmu->lock); + + mmu->on = true; + + ret = ivpu_mmu_reset(vdev); + if (ret) { + ivpu_err(vdev, "Failed to reset MMU: %d\n", ret); + goto err; + } + + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); + if (ret) + goto err; + + ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev); + if (ret) + goto err; + + ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + goto err; + + mutex_unlock(&mmu->lock); + + return 0; +err: + mmu->on = false; + mutex_unlock(&mmu->lock); + return ret; +} + +void ivpu_mmu_disable(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + + mutex_lock(&mmu->lock); + mmu->on = false; + mutex_unlock(&mmu->lock); +} + +static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event) +{ + u32 ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); + u32 op = FIELD_GET(IVPU_MMU_EVT_OP_MASK, event[0]); + u64 fetch_addr = ((u64)event[7]) << 32 | event[6]; + u64 in_addr = ((u64)event[5]) << 32 | event[4]; + u32 sid = event[1]; + + ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", + op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr); +} + +static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) +{ + struct ivpu_mmu_queue *evtq = &vdev->mmu->evtq; + u32 idx = IVPU_MMU_Q_IDX(evtq->cons); + u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE); + + evtq->prod = REGV_RD32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC); + if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) + return NULL; + + clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); + + evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, evtq->cons); + + return evt; +} + +void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) +{ + u32 *event; + u32 ssid; + + ivpu_dbg(vdev, IRQ, "MMU event queue\n"); + + while ((event = ivpu_mmu_get_event(vdev)) != NULL) { + ivpu_mmu_dump_event(vdev, event); + + ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); + if (ssid != IVPU_GLOBAL_CONTEXT_MMU_SSID) + ivpu_mmu_user_context_mark_invalid(vdev, ssid); + } +} + +void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) +{ + u32 gerror_val, gerrorn_val, active; + + ivpu_dbg(vdev, IRQ, "MMU error\n"); + + gerror_val = REGV_RD32(MTL_VPU_HOST_MMU_GERROR); + gerrorn_val = REGV_RD32(MTL_VPU_HOST_MMU_GERRORN); + + active = gerror_val ^ gerrorn_val; + if (!(active & IVPU_MMU_GERROR_ERR_MASK)) + return; + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT, active)) + ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT, active)) + ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ, active)) + ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n"); + + REGV_WR32(MTL_VPU_HOST_MMU_GERRORN, gerror_val); +} + +int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) +{ + return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); +} + +void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) +{ + ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ +} diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h new file mode 100644 index 000000000000..cb551126806b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_MMU_H__ +#define __IVPU_MMU_H__ + +struct ivpu_device; + +struct ivpu_mmu_cdtab { + void *base; + dma_addr_t dma; +}; + +struct ivpu_mmu_strtab { + void *base; + dma_addr_t dma; + u64 dma_q; + u32 base_cfg; +}; + +struct ivpu_mmu_queue { + void *base; + dma_addr_t dma; + u64 dma_q; + u32 prod; + u32 cons; +}; + +struct ivpu_mmu_info { + struct mutex lock; /* Protects cdtab, strtab, cmdq, on */ + struct ivpu_mmu_cdtab cdtab; + struct ivpu_mmu_strtab strtab; + struct ivpu_mmu_queue cmdq; + struct ivpu_mmu_queue evtq; + bool on; +}; + +int ivpu_mmu_init(struct ivpu_device *vdev); +void ivpu_mmu_disable(struct ivpu_device *vdev); +int ivpu_mmu_enable(struct ivpu_device *vdev); +int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); +void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); +int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); + +void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); +void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); + +#endif /* __IVPU_MMU_H__ */ diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c new file mode 100644 index 000000000000..8ce9b12ac356 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" + +#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30) +#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) +#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12) +#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0) +#define IVPU_MMU_ENTRY_FLAG_NG BIT(11) +#define IVPU_MMU_ENTRY_FLAG_AF BIT(10) +#define IVPU_MMU_ENTRY_FLAG_USER BIT(6) +#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) +#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) +#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0) + +#define IVPU_MMU_PAGE_SIZE SZ_4K +#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) +#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) +#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) + +#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000 +#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID) +#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK) +#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \ + IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID) + +static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + dma_addr_t pgd_dma; + u64 *pgd; + + pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL); + if (!pgd) + return -ENOMEM; + + pgtable->pgd = pgd; + pgtable->pgd_dma = pgd_dma; + + return 0; +} + +static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + int pgd_index, pmd_index; + + for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) { + u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index]; + u64 *pmd = pgtable->pgd_entries[pgd_index]; + + if (!pmd_entries) + continue; + + for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) { + if (pmd_entries[pmd_index]) + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, + pmd_entries[pmd_index], + pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + } + + kfree(pmd_entries); + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index], + pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + } + + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd, + pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK); +} + +static u64* +ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index) +{ + u64 **pmd_entries; + dma_addr_t pmd_dma; + u64 *pmd; + + if (pgtable->pgd_entries[pgd_index]) + return pgtable->pgd_entries[pgd_index]; + + pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); + if (!pmd) + return NULL; + + pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pmd_entries) + goto err_free_pgd; + + pgtable->pgd_entries[pgd_index] = pmd; + pgtable->pgd_cpu_entries[pgd_index] = pmd_entries; + pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID; + + return pmd; + +err_free_pgd: + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma); + return NULL; +} + +static u64* +ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, + int pgd_index, int pmd_index) +{ + dma_addr_t pte_dma; + u64 *pte; + + if (pgtable->pgd_cpu_entries[pgd_index][pmd_index]) + return pgtable->pgd_cpu_entries[pgd_index][pmd_index]; + + pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); + if (!pte) + return NULL; + + pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte; + pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID; + + return pte; +} + +static int +ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, dma_addr_t dma_addr, int prot) +{ + u64 *pte; + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Allocate PMD - second level page table if needed */ + if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index)) + return -ENOMEM; + + /* Allocate PTE - third level page table if needed */ + pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index); + if (!pte) + return -ENOMEM; + + /* Update PTE - third level page table with DMA address */ + pte[pte_index] = dma_addr | prot; + + return 0; +} + +static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) +{ + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Update PTE with dummy physical address and clear flags */ + ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID; +} + +static void +ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) +{ + u64 end_addr = vpu_addr + size; + u64 *pgd = ctx->pgtable.pgd; + + /* Align to PMD entry (2 MB) */ + vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); + + while (vpu_addr < end_addr) { + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; + u64 *pmd = ctx->pgtable.pgd_entries[pgd_index]; + + while (vpu_addr < end_addr && vpu_addr < pmd_end) { + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index]; + + clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE); + vpu_addr += IVPU_MMU_PTE_MAP_SIZE; + } + clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE); + } + clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE); +} + +static int +ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot) +{ + while (size) { + int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + + if (ret) + return ret; + + vpu_addr += IVPU_MMU_PAGE_SIZE; + dma_addr += IVPU_MMU_PAGE_SIZE; + size -= IVPU_MMU_PAGE_SIZE; + } + + return 0; +} + +static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) +{ + while (size) { + ivpu_mmu_context_unmap_page(ctx, vpu_addr); + vpu_addr += IVPU_MMU_PAGE_SIZE; + size -= IVPU_MMU_PAGE_SIZE; + } +} + +int +ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) +{ + struct scatterlist *sg; + int prot; + int ret; + u64 i; + + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) + return -EINVAL; + /* + * VPU is only 32 bit, but DMA engine is 38 bit + * Ranges < 2 GB are reserved for VPU internal registers + * Limit range to 8 GB + */ + if (vpu_addr < SZ_2G || vpu_addr > SZ_8G) + return -EINVAL; + + prot = IVPU_MMU_ENTRY_MAPPED; + if (llc_coherent) + prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT; + + mutex_lock(&ctx->lock); + + for_each_sgtable_dma_sg(sgt, sg, i) { + u64 dma_addr = sg_dma_address(sg) - sg->offset; + size_t size = sg_dma_len(sg) + sg->offset; + + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); + if (ret) { + ivpu_err(vdev, "Failed to map context pages\n"); + mutex_unlock(&ctx->lock); + return ret; + } + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); + vpu_addr += size; + } + + mutex_unlock(&ctx->lock); + + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + return ret; +} + +void +ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt) +{ + struct scatterlist *sg; + int ret; + u64 i; + + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) + ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr); + + mutex_lock(&ctx->lock); + + for_each_sgtable_dma_sg(sgt, sg, i) { + size_t size = sg_dma_len(sg) + sg->offset; + + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); + vpu_addr += size; + } + + mutex_unlock(&ctx->lock); + + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); +} + +int +ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node) +{ + lockdep_assert_held(&ctx->lock); + + return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, + 0, range->start, range->end, DRM_MM_INSERT_BEST); +} + +void +ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) +{ + lockdep_assert_held(&ctx->lock); + + drm_mm_remove_node(node); +} + +static int +ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +{ + u64 start, end; + int ret; + + mutex_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->bo_list); + + ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); + if (ret) + return ret; + + if (!context_id) { + start = vdev->hw->ranges.global_low.start; + end = vdev->hw->ranges.global_high.end; + } else { + start = vdev->hw->ranges.user_low.start; + end = vdev->hw->ranges.user_high.end; + } + + drm_mm_init(&ctx->mm, start, end - start); + ctx->id = context_id; + + return 0; +} + +static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +{ + drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd); + + mutex_destroy(&ctx->lock); + ivpu_mmu_pgtable_free(vdev, &ctx->pgtable); + drm_mm_takedown(&ctx->mm); +} + +int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +{ + return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); +} + +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) +{ + return ivpu_mmu_context_fini(vdev, &vdev->gctx); +} + +void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) +{ + struct ivpu_file_priv *file_priv; + + xa_lock(&vdev->context_xa); + + file_priv = xa_load(&vdev->context_xa, ssid); + if (file_priv) + file_priv->has_mmu_faults = true; + + xa_unlock(&vdev->context_xa); +} + +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) +{ + int ret; + + drm_WARN_ON(&vdev->drm, !ctx_id); + + ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); + if (ret) { + ivpu_err(vdev, "Failed to initialize context: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set page table: %d\n", ret); + goto err_context_fini; + } + + return 0; + +err_context_fini: + ivpu_mmu_context_fini(vdev, ctx); + return ret; +} + +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +{ + drm_WARN_ON(&vdev->drm, !ctx->id); + + ivpu_mmu_clear_pgtable(vdev, ctx->id); + ivpu_mmu_context_fini(vdev, ctx); +} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h new file mode 100644 index 000000000000..ddf11b95023a --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_MMU_CONTEXT_H__ +#define __IVPU_MMU_CONTEXT_H__ + +#include + +struct ivpu_device; +struct ivpu_file_priv; +struct ivpu_addr_range; + +#define IVPU_MMU_PGTABLE_ENTRIES 512 + +struct ivpu_mmu_pgtable { + u64 **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES]; + u64 *pgd_entries[IVPU_MMU_PGTABLE_ENTRIES]; + u64 *pgd; + dma_addr_t pgd_dma; +}; + +struct ivpu_mmu_context { + struct mutex lock; /* protects: mm, pgtable, bo_list */ + struct drm_mm mm; + struct ivpu_mmu_pgtable pgtable; + struct list_head bo_list; + u32 id; +}; + +int ivpu_mmu_global_context_init(struct ivpu_device *vdev); +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); + +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); + +int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node); +void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, + struct drm_mm_node *node); + +int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); +void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt); + +#endif /* __IVPU_MMU_CONTEXT_H__ */ diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index c6a98977eb8e..543347df51a1 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -38,6 +38,7 @@ extern "C" { #define DRM_IVPU_PARAM_NUM_CONTEXTS 4 #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 #define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 +#define DRM_IVPU_PARAM_CONTEXT_ID 7 #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 @@ -78,6 +79,9 @@ struct drm_ivpu_param { * Value of current context scheduling priority (read-write). * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. * + * %DRM_IVPU_PARAM_CONTEXT_ID: + * Current context ID, always greater than 0 (read-only) + * */ __u32 param; From 647371a6609ddf8700fe151af72e32daebb9baa7 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:19 +0100 Subject: [PATCH 16/45] accel/ivpu: Add GEM buffer object management Adds four types of GEM-based BOs for the VPU: - shmem - internal - prime All types are implemented as struct ivpu_bo, based on struct drm_gem_object. VPU address is allocated when buffer is created except for imported prime buffers that allocate it in BO_INFO IOCTL due to missing file_priv arg in gem_prime_import callback. Internal buffers are pinned on creation, the rest of buffers types can be pinned on demand (in SUBMIT IOCTL). Buffer VPU address, allocated pages and mappings are released when the buffer is destroyed. Eviction mechanism is planned for future versions. Add two new IOCTLs: BO_CREATE, BO_INFO Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 1 + drivers/accel/ivpu/ivpu_drv.c | 30 +- drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_gem.c | 727 ++++++++++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_gem.h | 126 ++++++ include/uapi/drm/ivpu_accel.h | 94 +++++ 6 files changed, 977 insertions(+), 2 deletions(-) create mode 100644 drivers/accel/ivpu/ivpu_gem.c create mode 100644 drivers/accel/ivpu/ivpu_gem.h diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 59cd7843b218..5d7c5862399c 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -3,6 +3,7 @@ intel_vpu-y := \ ivpu_drv.o \ + ivpu_gem.o \ ivpu_hw_mtl.o \ ivpu_mmu.o \ ivpu_mmu_context.o diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 69ca7dae8b57..53d92d06814b 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -12,8 +12,10 @@ #include #include #include +#include #include "ivpu_drv.h" +#include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -49,6 +51,24 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) return file_priv; } +struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id) +{ + struct ivpu_file_priv *file_priv; + + xa_lock_irq(&vdev->context_xa); + file_priv = xa_load(&vdev->context_xa, id); + /* file_priv may still be in context_xa during file_priv_release() */ + if (file_priv && !kref_get_unless_zero(&file_priv->ref)) + file_priv = NULL; + xa_unlock_irq(&vdev->context_xa); + + if (file_priv) + ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n", + file_priv->ctx.id, kref_read(&file_priv->ref)); + + return file_priv; +} + static void file_priv_release(struct kref *ref) { struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); @@ -57,7 +77,7 @@ static void file_priv_release(struct kref *ref) ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); - WARN_ON(xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); + drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); kfree(file_priv); } @@ -66,7 +86,7 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) struct ivpu_file_priv *file_priv = *link; struct ivpu_device *vdev = file_priv->vdev; - WARN_ON(!file_priv); + drm_WARN_ON(&vdev->drm, !file_priv); ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", file_priv->ctx.id, kref_read(&file_priv->ref)); @@ -200,6 +220,8 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), }; int ivpu_shutdown(struct ivpu_device *vdev) @@ -227,6 +249,10 @@ static const struct drm_driver driver = { .open = ivpu_open, .postclose = ivpu_postclose, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = ivpu_gem_prime_import, + .gem_prime_mmap = drm_gem_prime_mmap, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index b9ce66f40696..972f71cfaa12 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -115,6 +115,7 @@ extern u8 ivpu_pll_min_ratio; extern u8 ivpu_pll_max_ratio; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); +struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id); void ivpu_file_priv_put(struct ivpu_file_priv **link); int ivpu_shutdown(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c new file mode 100644 index 000000000000..b938359b19af --- /dev/null +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" + +MODULE_IMPORT_NS(DMA_BUF); + +static const struct drm_gem_object_funcs ivpu_gem_funcs; + +static struct lock_class_key prime_bo_lock_class_key; + +static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo) +{ + /* Pages are managed by the underlying dma-buf */ + return 0; +} + +static void prime_free_pages_locked(struct ivpu_bo *bo) +{ + /* Pages are managed by the underlying dma-buf */ +} + +static int prime_map_pages_locked(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct sg_table *sgt; + + WARN_ON(!bo->base.import_attach); + + sgt = dma_buf_map_attachment(bo->base.import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt)); + return PTR_ERR(sgt); + } + + bo->sgt = sgt; + return 0; +} + +static void prime_unmap_pages_locked(struct ivpu_bo *bo) +{ + WARN_ON(!bo->base.import_attach); + + dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL); + bo->sgt = NULL; +} + +static const struct ivpu_bo_ops prime_ops = { + .type = IVPU_BO_TYPE_PRIME, + .name = "prime", + .alloc_pages = prime_alloc_pages_locked, + .free_pages = prime_free_pages_locked, + .map_pages = prime_map_pages_locked, + .unmap_pages = prime_unmap_pages_locked, +}; + +static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo) +{ + int npages = bo->base.size >> PAGE_SHIFT; + struct page **pages; + + pages = drm_gem_get_pages(&bo->base); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + if (bo->flags & DRM_IVPU_BO_WC) + set_pages_array_wc(pages, npages); + else if (bo->flags & DRM_IVPU_BO_UNCACHED) + set_pages_array_uc(pages, npages); + + bo->pages = pages; + return 0; +} + +static void shmem_free_pages_locked(struct ivpu_bo *bo) +{ + if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) + set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT); + + drm_gem_put_pages(&bo->base, bo->pages, true, false); + bo->pages = NULL; +} + +static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo) +{ + int npages = bo->base.size >> PAGE_SHIFT; + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct sg_table *sgt; + int ret; + + sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages); + if (IS_ERR(sgt)) { + ivpu_err(vdev, "Failed to allocate sgtable\n"); + return PTR_ERR(sgt); + } + + ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0); + if (ret) { + ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); + goto err_free_sgt; + } + + bo->sgt = sgt; + return 0; + +err_free_sgt: + kfree(sgt); + return ret; +} + +static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + + dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0); + sg_free_table(bo->sgt); + kfree(bo->sgt); + bo->sgt = NULL; +} + +static const struct ivpu_bo_ops shmem_ops = { + .type = IVPU_BO_TYPE_SHMEM, + .name = "shmem", + .alloc_pages = shmem_alloc_pages_locked, + .free_pages = shmem_free_pages_locked, + .map_pages = ivpu_bo_map_pages_locked, + .unmap_pages = ivpu_bo_unmap_pages_locked, +}; + +static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo) +{ + unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + struct page **pages; + int ret; + + pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!pages[i]) { + ret = -ENOMEM; + goto err_free_pages; + } + cond_resched(); + } + + bo->pages = pages; + return 0; + +err_free_pages: + while (i--) + put_page(pages[i]); + kvfree(pages); + return ret; +} + +static void internal_free_pages_locked(struct ivpu_bo *bo) +{ + unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + + for (i = 0; i < npages; i++) + put_page(bo->pages[i]); + + kvfree(bo->pages); + bo->pages = NULL; +} + +static const struct ivpu_bo_ops internal_ops = { + .type = IVPU_BO_TYPE_INTERNAL, + .name = "internal", + .alloc_pages = internal_alloc_pages_locked, + .free_pages = internal_free_pages_locked, + .map_pages = ivpu_bo_map_pages_locked, + .unmap_pages = ivpu_bo_unmap_pages_locked, +}; + +static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + int ret; + + lockdep_assert_held(&bo->lock); + drm_WARN_ON(&vdev->drm, bo->sgt); + + ret = bo->ops->alloc_pages(bo); + if (ret) { + ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret); + return ret; + } + + ret = bo->ops->map_pages(bo); + if (ret) { + ivpu_err(vdev, "Failed to map pages for BO: %d", ret); + goto err_free_pages; + } + return ret; + +err_free_pages: + bo->ops->free_pages(bo); + return ret; +} + +static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo) +{ + mutex_lock(&bo->lock); + + WARN_ON(!bo->sgt); + bo->ops->unmap_pages(bo); + WARN_ON(bo->sgt); + bo->ops->free_pages(bo); + WARN_ON(bo->pages); + + mutex_unlock(&bo->lock); +} + +/* + * ivpu_bo_pin() - pin the backing physical pages and map them to VPU. + * + * This function pins physical memory pages, then maps the physical pages + * to IOMMU address space and finally updates the VPU MMU page tables + * to allow the VPU to translate VPU address to IOMMU address. + */ +int __must_check ivpu_bo_pin(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + int ret = 0; + + mutex_lock(&bo->lock); + + if (!bo->vpu_addr) { + ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n", + bo->ctx->id, bo->handle); + ret = -EINVAL; + goto unlock; + } + + if (!bo->sgt) { + ret = ivpu_bo_alloc_and_map_pages_locked(bo); + if (ret) + goto unlock; + } + + if (!bo->mmu_mapped) { + ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt, + ivpu_bo_is_snooped(bo)); + if (ret) { + ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret); + goto unlock; + } + bo->mmu_mapped = true; + } + +unlock: + mutex_unlock(&bo->lock); + + return ret; +} + +static int +ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + int ret; + + if (!range) { + if (bo->flags & DRM_IVPU_BO_HIGH_MEM) + range = &vdev->hw->ranges.user_high; + else + range = &vdev->hw->ranges.user_low; + } + + mutex_lock(&ctx->lock); + ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node); + if (!ret) { + bo->ctx = ctx; + bo->vpu_addr = bo->mm_node.start; + list_add_tail(&bo->ctx_node, &ctx->bo_list); + } + mutex_unlock(&ctx->lock); + + return ret; +} + +static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct ivpu_mmu_context *ctx = bo->ctx; + + ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", + ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); + + mutex_lock(&bo->lock); + + if (bo->mmu_mapped) { + drm_WARN_ON(&vdev->drm, !bo->sgt); + ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt); + bo->mmu_mapped = false; + } + + mutex_lock(&ctx->lock); + list_del(&bo->ctx_node); + bo->vpu_addr = 0; + bo->ctx = NULL; + ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node); + mutex_unlock(&ctx->lock); + + mutex_unlock(&bo->lock); +} + +void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx) +{ + struct ivpu_bo *bo, *tmp; + + list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node) + ivpu_bo_free_vpu_addr(bo); +} + +static struct ivpu_bo * +ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context, + u64 size, u32 flags, const struct ivpu_bo_ops *ops, + const struct ivpu_addr_range *range, u64 user_ptr) +{ + struct ivpu_bo *bo; + int ret = 0; + + if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size))) + return ERR_PTR(-EINVAL); + + switch (flags & DRM_IVPU_BO_CACHE_MASK) { + case DRM_IVPU_BO_CACHED: + case DRM_IVPU_BO_UNCACHED: + case DRM_IVPU_BO_WC: + break; + default: + return ERR_PTR(-EINVAL); + } + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + mutex_init(&bo->lock); + bo->base.funcs = &ivpu_gem_funcs; + bo->flags = flags; + bo->ops = ops; + bo->user_ptr = user_ptr; + + if (ops->type == IVPU_BO_TYPE_SHMEM) + ret = drm_gem_object_init(&vdev->drm, &bo->base, size); + else + drm_gem_private_object_init(&vdev->drm, &bo->base, size); + + if (ret) { + ivpu_err(vdev, "Failed to initialize drm object\n"); + goto err_free; + } + + if (flags & DRM_IVPU_BO_MAPPABLE) { + ret = drm_gem_create_mmap_offset(&bo->base); + if (ret) { + ivpu_err(vdev, "Failed to allocate mmap offset\n"); + goto err_release; + } + } + + if (mmu_context) { + ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range); + if (ret) { + ivpu_err(vdev, "Failed to add BO to context: %d\n", ret); + goto err_release; + } + } + + return bo; + +err_release: + drm_gem_object_release(&bo->base); +err_free: + kfree(bo); + return ERR_PTR(ret); +} + +static void ivpu_bo_free(struct drm_gem_object *obj) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + + if (bo->ctx) + ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", + bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); + else + ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n", + (bool)bo->sgt, bo->mmu_mapped); + + drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + + vunmap(bo->kvaddr); + + if (bo->ctx) + ivpu_bo_free_vpu_addr(bo); + + if (bo->sgt) + ivpu_bo_unmap_and_free_pages(bo); + + if (bo->base.import_attach) + drm_prime_gem_destroy(&bo->base, bo->sgt); + + drm_gem_object_release(&bo->base); + + mutex_destroy(&bo->lock); + kfree(bo); +} + +static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + + ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s", + bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name); + + if (obj->import_attach) { + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + drm_gem_object_put(obj); + vma->vm_private_data = NULL; + return dma_buf_mmap(obj->dma_buf, vma, 0); + } + + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND; + vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags)); + + return 0; +} + +static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + loff_t npages = obj->size >> PAGE_SHIFT; + int ret = 0; + + mutex_lock(&bo->lock); + + if (!bo->sgt) + ret = ivpu_bo_alloc_and_map_pages_locked(bo); + + mutex_unlock(&bo->lock); + + if (ret) + return ERR_PTR(ret); + + return drm_prime_pages_to_sg(obj->dev, bo->pages, npages); +} + +static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct ivpu_bo *bo = to_ivpu_bo(obj); + loff_t npages = obj->size >> PAGE_SHIFT; + pgoff_t page_offset; + struct page *page; + vm_fault_t ret; + int err; + + mutex_lock(&bo->lock); + + if (!bo->sgt) { + err = ivpu_bo_alloc_and_map_pages_locked(bo); + if (err) { + ret = vmf_error(err); + goto unlock; + } + } + + /* We don't use vmf->pgoff since that has the fake offset */ + page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + if (page_offset >= npages) { + ret = VM_FAULT_SIGBUS; + } else { + page = bo->pages[page_offset]; + ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); + } + +unlock: + mutex_unlock(&bo->lock); + + return ret; +} + +static const struct vm_operations_struct ivpu_vm_ops = { + .fault = ivpu_vm_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static const struct drm_gem_object_funcs ivpu_gem_funcs = { + .free = ivpu_bo_free, + .mmap = ivpu_bo_mmap, + .vm_ops = &ivpu_vm_ops, + .get_sg_table = ivpu_bo_get_sg_table, +}; + +int +ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_bo_create *args = data; + u64 size = PAGE_ALIGN(args->size); + struct ivpu_bo *bo; + int ret; + + if (args->flags & ~DRM_IVPU_BO_FLAGS) + return -EINVAL; + + if (size == 0) + return -EINVAL; + + bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0); + if (IS_ERR(bo)) { + ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)", + bo, file_priv->ctx.id, args->size, args->flags); + return PTR_ERR(bo); + } + + ret = drm_gem_handle_create(file, &bo->base, &bo->handle); + if (!ret) { + args->vpu_addr = bo->vpu_addr; + args->handle = bo->handle; + } + + drm_gem_object_put(&bo->base); + + ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n", + file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags); + + return ret; +} + +struct ivpu_bo * +ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags) +{ + const struct ivpu_addr_range *range; + struct ivpu_addr_range fixed_range; + struct ivpu_bo *bo; + pgprot_t prot; + int ret; + + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); + + if (vpu_addr) { + fixed_range.start = vpu_addr; + fixed_range.end = vpu_addr + size; + range = &fixed_range; + } else { + range = &vdev->hw->ranges.global_low; + } + + bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0); + if (IS_ERR(bo)) { + ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", + bo, vpu_addr, size, flags); + return NULL; + } + + ret = ivpu_bo_pin(bo); + if (ret) + goto err_put; + + if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) + drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT); + + prot = ivpu_bo_pgprot(bo, PAGE_KERNEL); + bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot); + if (!bo->kvaddr) { + ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n"); + goto err_put; + } + + ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n", + bo->vpu_addr, bo->base.size, flags); + + return bo; + +err_put: + drm_gem_object_put(&bo->base); + return NULL; +} + +void ivpu_bo_free_internal(struct ivpu_bo *bo) +{ + drm_gem_object_put(&bo->base); +} + +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct dma_buf_attachment *attach; + struct ivpu_bo *bo; + + attach = dma_buf_attach(buf, dev->dev); + if (IS_ERR(attach)) + return ERR_CAST(attach); + + get_dma_buf(buf); + + bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0); + if (IS_ERR(bo)) { + ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size); + goto err_detach; + } + + lockdep_set_class(&bo->lock, &prime_bo_lock_class_key); + + bo->base.import_attach = attach; + + return &bo->base; + +err_detach: + dma_buf_detach(buf, attach); + dma_buf_put(buf); + return ERR_CAST(bo); +} + +int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = to_ivpu_device(dev); + struct drm_ivpu_bo_info *args = data; + struct drm_gem_object *obj; + struct ivpu_bo *bo; + int ret = 0; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + bo = to_ivpu_bo(obj); + + mutex_lock(&bo->lock); + + if (!bo->ctx) { + ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL); + if (ret) { + ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret); + goto unlock; + } + } + + args->flags = bo->flags; + args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node); + args->vpu_addr = bo->vpu_addr; + args->size = obj->size; +unlock: + mutex_unlock(&bo->lock); + drm_gem_object_put(obj); + return ret; +} + +static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) +{ + unsigned long dma_refcount = 0; + + if (bo->base.dma_buf && bo->base.dma_buf->file) + dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count); + + drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n", + bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, + kref_read(&bo->base.refcount), dma_refcount, bo->ops->name); +} + +void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + struct ivpu_bo *bo; + + drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n", + "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type"); + + mutex_lock(&vdev->gctx.lock); + list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node) + ivpu_bo_print_info(bo, p); + mutex_unlock(&vdev->gctx.lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); + if (!file_priv) + continue; + + mutex_lock(&file_priv->ctx.lock); + list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node) + ivpu_bo_print_info(bo, p); + mutex_unlock(&file_priv->ctx.lock); + + ivpu_file_priv_put(&file_priv); + } +} + +void ivpu_bo_list_print(struct drm_device *dev) +{ + struct drm_printer p = drm_info_printer(dev->dev); + + ivpu_bo_list(dev, &p); +} diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h new file mode 100644 index 000000000000..1891c90702c2 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ +#ifndef __IVPU_GEM_H__ +#define __IVPU_GEM_H__ + +#include +#include + +struct dma_buf; +struct ivpu_bo_ops; +struct ivpu_file_priv; + +struct ivpu_bo { + struct drm_gem_object base; + const struct ivpu_bo_ops *ops; + + struct ivpu_mmu_context *ctx; + struct list_head ctx_node; + struct drm_mm_node mm_node; + + struct mutex lock; /* Protects: pages, sgt, mmu_mapped */ + struct sg_table *sgt; + struct page **pages; + bool mmu_mapped; + + void *kvaddr; + u64 vpu_addr; + u32 handle; + u32 flags; + uintptr_t user_ptr; +}; + +enum ivpu_bo_type { + IVPU_BO_TYPE_SHMEM = 1, + IVPU_BO_TYPE_INTERNAL, + IVPU_BO_TYPE_PRIME, +}; + +struct ivpu_bo_ops { + enum ivpu_bo_type type; + const char *name; + int (*alloc_pages)(struct ivpu_bo *bo); + void (*free_pages)(struct ivpu_bo *bo); + int (*map_pages)(struct ivpu_bo *bo); + void (*unmap_pages)(struct ivpu_bo *bo); +}; + +int ivpu_bo_pin(struct ivpu_bo *bo); +void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx); +void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); +void ivpu_bo_list_print(struct drm_device *dev); + +struct ivpu_bo * +ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); +void ivpu_bo_free_internal(struct ivpu_bo *bo); +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); +void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo); + +int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj) +{ + return container_of(obj, struct ivpu_bo, base); +} + +static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset) +{ + if (offset > bo->base.size || !bo->pages) + return NULL; + + return bo->pages[offset / PAGE_SIZE]; +} + +static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) +{ + return bo->flags & DRM_IVPU_BO_CACHE_MASK; +} + +static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) +{ + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; +} + +static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot) +{ + if (bo->flags & DRM_IVPU_BO_WC) + return pgprot_writecombine(prot); + + if (bo->flags & DRM_IVPU_BO_UNCACHED) + return pgprot_noncached(prot); + + return prot; +} + +static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) +{ + return to_ivpu_device(bo->base.dev); +} + +static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) +{ + if (vpu_addr < bo->vpu_addr) + return NULL; + + if (vpu_addr >= (bo->vpu_addr + bo->base.size)) + return NULL; + + return bo->kvaddr + (vpu_addr - bo->vpu_addr); +} + +static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr) +{ + if (cpu_addr < bo->kvaddr) + return 0; + + if (cpu_addr >= (bo->kvaddr + bo->base.size)) + return 0; + + return bo->vpu_addr + (cpu_addr - bo->kvaddr); +} + +#endif /* __IVPU_GEM_H__ */ diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 543347df51a1..093b83c5697e 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -17,6 +17,8 @@ extern "C" { #define DRM_IVPU_GET_PARAM 0x00 #define DRM_IVPU_SET_PARAM 0x01 +#define DRM_IVPU_BO_CREATE 0x02 +#define DRM_IVPU_BO_INFO 0x03 #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -24,6 +26,12 @@ extern "C" { #define DRM_IOCTL_IVPU_SET_PARAM \ DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param) +#define DRM_IOCTL_IVPU_BO_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE, struct drm_ivpu_bo_create) + +#define DRM_IOCTL_IVPU_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) + /** * DOC: contexts * @@ -92,6 +100,92 @@ struct drm_ivpu_param { __u64 value; }; +#define DRM_IVPU_BO_HIGH_MEM 0x00000001 +#define DRM_IVPU_BO_MAPPABLE 0x00000002 + +#define DRM_IVPU_BO_CACHED 0x00000000 +#define DRM_IVPU_BO_UNCACHED 0x00010000 +#define DRM_IVPU_BO_WC 0x00020000 +#define DRM_IVPU_BO_CACHE_MASK 0x00030000 + +#define DRM_IVPU_BO_FLAGS \ + (DRM_IVPU_BO_HIGH_MEM | \ + DRM_IVPU_BO_MAPPABLE | \ + DRM_IVPU_BO_CACHE_MASK) + +/** + * struct drm_ivpu_bo_create - Create BO backed by SHMEM + * + * Create GEM buffer object allocated in SHMEM memory. + */ +struct drm_ivpu_bo_create { + /** @size: The size in bytes of the allocated memory */ + __u64 size; + + /** + * @flags: + * + * Supported flags: + * + * %DRM_IVPU_BO_HIGH_MEM: + * + * Allocate VPU address from >4GB range. + * Buffer object with vpu address >4GB can be always accessed by the + * VPU DMA engine, but some HW generation may not be able to access + * this memory from then firmware running on the VPU management processor. + * Suitable for input, output and some scratch buffers. + * + * %DRM_IVPU_BO_MAPPABLE: + * + * Buffer object can be mapped using mmap(). + * + * %DRM_IVPU_BO_CACHED: + * + * Allocated BO will be cached on host side (WB) and snooped on the VPU side. + * This is the default caching mode. + * + * %DRM_IVPU_BO_UNCACHED: + * + * Allocated BO will not be cached on host side nor snooped on the VPU side. + * + * %DRM_IVPU_BO_WC: + * + * Allocated BO will use write combining buffer for writes but reads will be + * uncached. + */ + __u32 flags; + + /** @handle: Returned GEM object handle */ + __u32 handle; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; +}; + +/** + * struct drm_ivpu_bo_info - Query buffer object info + */ +struct drm_ivpu_bo_info { + /** @handle: Handle of the queried BO */ + __u32 handle; + + /** @flags: Returned flags used to create the BO */ + __u32 flags; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; + + /** + * @mmap_offset: + * + * Returned offset to be used in mmap(). 0 in case the BO is not mappable. + */ + __u64 mmap_offset; + + /** @size: Returned GEM object size, aligned to PAGE_SIZE */ + __u64 size; +}; + #if defined(__cplusplus) } #endif From 5d7422cfb498bf25c4a9ea6b9d82253cb5236364 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:20 +0100 Subject: [PATCH 17/45] accel/ivpu: Add IPC driver and JSM messages The IPC driver is used to send and receive messages to/from firmware running on the VPU. The only supported IPC message format is Job Submission Model (JSM) defined in vpu_jsm_api.h header. Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-5-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 2 + drivers/accel/ivpu/ivpu_drv.c | 13 + drivers/accel/ivpu/ivpu_drv.h | 2 + drivers/accel/ivpu/ivpu_hw_mtl.c | 4 + drivers/accel/ivpu/ivpu_ipc.c | 500 +++++++++++++++ drivers/accel/ivpu/ivpu_ipc.h | 93 +++ drivers/accel/ivpu/ivpu_jsm_msg.c | 169 +++++ drivers/accel/ivpu/ivpu_jsm_msg.h | 23 + drivers/accel/ivpu/vpu_jsm_api.h | 999 ++++++++++++++++++++++++++++++ 9 files changed, 1805 insertions(+) create mode 100644 drivers/accel/ivpu/ivpu_ipc.c create mode 100644 drivers/accel/ivpu/ivpu_ipc.h create mode 100644 drivers/accel/ivpu/ivpu_jsm_msg.c create mode 100644 drivers/accel/ivpu/ivpu_jsm_msg.h create mode 100644 drivers/accel/ivpu/vpu_jsm_api.h diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 5d7c5862399c..46595f0112e3 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -5,6 +5,8 @@ intel_vpu-y := \ ivpu_drv.o \ ivpu_gem.o \ ivpu_hw_mtl.o \ + ivpu_ipc.o \ + ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 53d92d06814b..1eb12e157acd 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -17,6 +17,7 @@ #include "ivpu_drv.h" #include "ivpu_gem.h" #include "ivpu_hw.h" +#include "ivpu_ipc.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -229,6 +230,7 @@ int ivpu_shutdown(struct ivpu_device *vdev) int ret; ivpu_hw_irq_disable(vdev); + ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); ret = ivpu_hw_power_down(vdev); @@ -339,6 +341,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->mmu) return -ENOMEM; + vdev->ipc = drmm_kzalloc(&vdev->drm, sizeof(*vdev->ipc), GFP_KERNEL); + if (!vdev->ipc) + return -ENOMEM; + vdev->hw->ops = &ivpu_hw_mtl_ops; vdev->platform = IVPU_PLATFORM_INVALID; vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; @@ -383,6 +389,12 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_mmu_gctx_fini; } + ret = ivpu_ipc_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret); + goto err_mmu_gctx_fini; + } + return 0; err_mmu_gctx_fini: @@ -397,6 +409,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_shutdown(vdev); + ivpu_ipc_fini(vdev); ivpu_mmu_global_context_fini(vdev); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 972f71cfaa12..39b706d82e68 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -74,6 +74,7 @@ struct ivpu_wa_table { struct ivpu_hw_info; struct ivpu_mmu_info; +struct ivpu_ipc_info; struct ivpu_device { struct drm_device drm; @@ -85,6 +86,7 @@ struct ivpu_device { struct ivpu_wa_table wa; struct ivpu_hw_info *hw; struct ivpu_mmu_info *mmu; + struct ivpu_ipc_info *ipc; struct ivpu_mmu_context gctx; struct xarray context_xa; diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index eaba2b8248b8..0e9ef4c40901 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -7,6 +7,7 @@ #include "ivpu_hw_mtl_reg.h" #include "ivpu_hw_reg_io.h" #include "ivpu_hw.h" +#include "ivpu_ipc.h" #include "ivpu_mmu.h" #define TILE_FUSE_ENABLE_BOTH 0x0 @@ -934,6 +935,9 @@ static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) ivpu_mmu_irq_evtq_handler(vdev); + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c new file mode 100644 index 000000000000..c756476bfc5b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_ipc.h" +#include "ivpu_jsm_msg.h" + +#define IPC_MAX_RX_MSG 128 +#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD) + +struct ivpu_ipc_tx_buf { + struct ivpu_ipc_hdr ipc; + struct vpu_jsm_msg jsm; +}; + +struct ivpu_ipc_rx_msg { + struct list_head link; + struct ivpu_ipc_hdr *ipc_hdr; + struct vpu_jsm_msg *jsm_msg; +}; + +static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c, + struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr) +{ + ivpu_dbg(vdev, IPC, + "%s: vpu:0x%x (data_addr:0x%08x, data_size:0x%x, channel:0x%x, src_node:0x%x, dst_node:0x%x, status:0x%x)", + c, vpu_addr, ipc_hdr->data_addr, ipc_hdr->data_size, ipc_hdr->channel, + ipc_hdr->src_node, ipc_hdr->dst_node, ipc_hdr->status); +} + +static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c, + struct vpu_jsm_msg *jsm_msg, u32 vpu_addr) +{ + u32 *payload = (u32 *)&jsm_msg->payload; + + ivpu_dbg(vdev, JSM, + "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n", + c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result, + payload[0], payload[1], payload[2], payload[3], payload[4]); +} + +static void +ivpu_ipc_rx_mark_free(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, + struct vpu_jsm_msg *jsm_msg) +{ + ipc_hdr->status = IVPU_IPC_HDR_FREE; + if (jsm_msg) + jsm_msg->status = VPU_JSM_MSG_FREE; + wmb(); /* Flush WC buffers for message statuses */ +} + +static void ivpu_ipc_mem_fini(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + ivpu_bo_free_internal(ipc->mem_rx); + ivpu_bo_free_internal(ipc->mem_tx); +} + +static int +ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct vpu_jsm_msg *req) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_tx_buf *tx_buf; + u32 tx_buf_vpu_addr; + u32 jsm_vpu_addr; + + tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf)); + if (!tx_buf_vpu_addr) { + ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n", + sizeof(*tx_buf)); + return -ENOMEM; + } + + tx_buf = ivpu_to_cpu_addr(ipc->mem_tx, tx_buf_vpu_addr); + if (drm_WARN_ON(&vdev->drm, !tx_buf)) { + gen_pool_free(ipc->mm_tx, tx_buf_vpu_addr, sizeof(*tx_buf)); + return -EIO; + } + + jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm); + + if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE) + ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n", + tx_buf_vpu_addr); + + if (tx_buf->jsm.status != VPU_JSM_MSG_FREE) + ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n", + jsm_vpu_addr); + + memset(tx_buf, 0, sizeof(*tx_buf)); + tx_buf->ipc.data_addr = jsm_vpu_addr; + /* TODO: Set data_size to actual JSM message size, not union of all messages */ + tx_buf->ipc.data_size = sizeof(*req); + tx_buf->ipc.channel = cons->channel; + tx_buf->ipc.src_node = 0; + tx_buf->ipc.dst_node = 1; + tx_buf->ipc.status = IVPU_IPC_HDR_ALLOCATED; + tx_buf->jsm.type = req->type; + tx_buf->jsm.status = VPU_JSM_MSG_ALLOCATED; + tx_buf->jsm.payload = req->payload; + + req->request_id = atomic_inc_return(&ipc->request_id); + tx_buf->jsm.request_id = req->request_id; + cons->request_id = req->request_id; + wmb(); /* Flush WC buffers for IPC, JSM msgs */ + + cons->tx_vpu_addr = tx_buf_vpu_addr; + + ivpu_jsm_msg_dump(vdev, "TX", &tx_buf->jsm, jsm_vpu_addr); + ivpu_ipc_msg_dump(vdev, "TX", &tx_buf->ipc, tx_buf_vpu_addr); + + return 0; +} + +static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + if (vpu_addr) + gen_pool_free(ipc->mm_tx, vpu_addr, sizeof(struct ivpu_ipc_tx_buf)); +} + +static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr) +{ + ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr); +} + +void +ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + INIT_LIST_HEAD(&cons->link); + cons->channel = channel; + cons->tx_vpu_addr = 0; + cons->request_id = 0; + spin_lock_init(&cons->rx_msg_lock); + INIT_LIST_HEAD(&cons->rx_msg_list); + init_waitqueue_head(&cons->rx_msg_wq); + + spin_lock_irq(&ipc->cons_list_lock); + list_add_tail(&cons->link, &ipc->cons_list); + spin_unlock_irq(&ipc->cons_list_lock); +} + +void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg, *r; + + spin_lock_irq(&ipc->cons_list_lock); + list_del(&cons->link); + spin_unlock_irq(&ipc->cons_list_lock); + + spin_lock_irq(&cons->rx_msg_lock); + list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) { + list_del(&rx_msg->link); + ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + atomic_dec(&ipc->rx_msg_count); + kfree(rx_msg); + } + spin_unlock_irq(&cons->rx_msg_lock); + + ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr); +} + +static int +ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + int ret; + + ret = mutex_lock_interruptible(&ipc->lock); + if (ret) + return ret; + + if (!ipc->on) { + ret = -EAGAIN; + goto unlock; + } + + ret = ivpu_ipc_tx_prepare(vdev, cons, req); + if (ret) + goto unlock; + + ivpu_ipc_tx(vdev, cons->tx_vpu_addr); + +unlock: + mutex_unlock(&ipc->lock); + return ret; +} + +int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_buf, + struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg; + int wait_ret, ret = 0; + + wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq, + (IS_KTHREAD() && kthread_should_stop()) || + !list_empty(&cons->rx_msg_list), + msecs_to_jiffies(timeout_ms)); + + if (IS_KTHREAD() && kthread_should_stop()) + return -EINTR; + + if (wait_ret == 0) + return -ETIMEDOUT; + + if (wait_ret < 0) + return -ERESTARTSYS; + + spin_lock_irq(&cons->rx_msg_lock); + rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link); + if (!rx_msg) { + spin_unlock_irq(&cons->rx_msg_lock); + return -EAGAIN; + } + list_del(&rx_msg->link); + spin_unlock_irq(&cons->rx_msg_lock); + + if (ipc_buf) + memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf)); + if (rx_msg->jsm_msg) { + u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload)); + + if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { + ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ret = -EBADMSG; + } + + if (ipc_payload) + memcpy(ipc_payload, rx_msg->jsm_msg, size); + } + + ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + atomic_dec(&ipc->rx_msg_count); + kfree(rx_msg); + + return ret; +} + +static int +ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, + unsigned long timeout_ms) +{ + struct ivpu_ipc_consumer cons; + int ret; + + ivpu_ipc_consumer_add(vdev, &cons, channel); + + ret = ivpu_ipc_send(vdev, &cons, req); + if (ret) { + ivpu_warn(vdev, "IPC send failed: %d\n", ret); + goto consumer_del; + } + + ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms); + if (ret) { + ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret); + goto consumer_del; + } + + if (resp->type != expected_resp_type) { + ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type); + ret = -EBADE; + } + +consumer_del: + ivpu_ipc_consumer_del(vdev, &cons); + return ret; +} + +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, + unsigned long timeout_ms) +{ + struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; + struct vpu_jsm_msg hb_resp; + int ret; + + ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp, + channel, timeout_ms); + if (ret != -ETIMEDOUT) + return ret; + + ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, + &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret == -ETIMEDOUT) + ivpu_hw_diagnose_failure(vdev); + + return ret; +} + +static bool +ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +{ + if (cons->channel != ipc_hdr->channel) + return false; + + if (!jsm_msg || jsm_msg->request_id == cons->request_id) + return true; + + return false; +} + +static void +ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg; + unsigned long flags; + + lockdep_assert_held(&ipc->cons_list_lock); + + rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); + if (!rx_msg) { + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + return; + } + + atomic_inc(&ipc->rx_msg_count); + + rx_msg->ipc_hdr = ipc_hdr; + rx_msg->jsm_msg = jsm_msg; + + spin_lock_irqsave(&cons->rx_msg_lock, flags); + list_add_tail(&rx_msg->link, &cons->rx_msg_list); + spin_unlock_irqrestore(&cons->rx_msg_lock, flags); + + wake_up(&cons->rx_msg_wq); +} + +int ivpu_ipc_irq_handler(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_consumer *cons; + struct ivpu_ipc_hdr *ipc_hdr; + struct vpu_jsm_msg *jsm_msg; + unsigned long flags; + bool dispatched; + u32 vpu_addr; + + /* + * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt. + * Without purge IPC FIFO to 0 next IPC interrupts won't be generated. + */ + while (ivpu_hw_reg_ipc_rx_count_get(vdev)) { + vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); + if (vpu_addr == REG_IO_ERROR) { + ivpu_err(vdev, "Failed to read IPC rx addr register\n"); + return -EIO; + } + + ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr); + if (!ipc_hdr) { + ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr); + continue; + } + ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr); + + jsm_msg = NULL; + if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) { + jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr); + if (!jsm_msg) { + ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr); + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL); + continue; + } + ivpu_jsm_msg_dump(vdev, "RX", jsm_msg, ipc_hdr->data_addr); + } + + if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) { + ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG); + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + continue; + } + + dispatched = false; + spin_lock_irqsave(&ipc->cons_list_lock, flags); + list_for_each_entry(cons, &ipc->cons_list, link) { + if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) { + ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg); + dispatched = true; + break; + } + } + spin_unlock_irqrestore(&ipc->cons_list_lock, flags); + + if (!dispatched) { + ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr); + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + } + } + + return 0; +} + +int ivpu_ipc_init(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + int ret = -ENOMEM; + + ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + if (!ipc->mem_tx) + return ret; + + ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + if (!ipc->mem_rx) + goto err_free_tx; + + ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT), + -1, "TX_IPC_JSM"); + if (IS_ERR(ipc->mm_tx)) { + ret = PTR_ERR(ipc->mm_tx); + ivpu_err(vdev, "Failed to create gen pool, %pe\n", ipc->mm_tx); + goto err_free_rx; + } + + ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1); + if (ret) { + ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret); + goto err_free_rx; + } + + INIT_LIST_HEAD(&ipc->cons_list); + spin_lock_init(&ipc->cons_list_lock); + drmm_mutex_init(&vdev->drm, &ipc->lock); + + ivpu_ipc_reset(vdev); + return 0; + +err_free_rx: + ivpu_bo_free_internal(ipc->mem_rx); +err_free_tx: + ivpu_bo_free_internal(ipc->mem_tx); + return ret; +} + +void ivpu_ipc_fini(struct ivpu_device *vdev) +{ + ivpu_ipc_mem_fini(vdev); +} + +void ivpu_ipc_enable(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + mutex_lock(&ipc->lock); + ipc->on = true; + mutex_unlock(&ipc->lock); +} + +void ivpu_ipc_disable(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_consumer *cons, *c; + unsigned long flags; + + mutex_lock(&ipc->lock); + ipc->on = false; + mutex_unlock(&ipc->lock); + + spin_lock_irqsave(&ipc->cons_list_lock, flags); + list_for_each_entry_safe(cons, c, &ipc->cons_list, link) + wake_up(&cons->rx_msg_wq); + spin_unlock_irqrestore(&ipc->cons_list_lock, flags); +} + +void ivpu_ipc_reset(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + mutex_lock(&ipc->lock); + + memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size); + memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size); + wmb(); /* Flush WC buffers for TX and RX rings */ + + mutex_unlock(&ipc->lock); +} diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h new file mode 100644 index 000000000000..9838202ecfad --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_IPC_H__ +#define __IVPU_IPC_H__ + +#include +#include + +#include "vpu_jsm_api.h" + +struct ivpu_bo; + +/* VPU FW boot notification */ +#define IVPU_IPC_CHAN_BOOT_MSG 0x3ff +#define IVPU_IPC_BOOT_MSG_DATA_ADDR 0x424f4f54 + +/* The alignment to be used for IPC Buffers and IPC Data. */ +#define IVPU_IPC_ALIGNMENT 64 + +#define IVPU_IPC_HDR_FREE 0 +#define IVPU_IPC_HDR_ALLOCATED 0 + +/** + * struct ivpu_ipc_hdr - The IPC message header structure, exchanged + * with the VPU device firmware. + * @data_addr: The VPU address of the payload (JSM message) + * @data_size: The size of the payload. + * @channel: The channel used. + * @src_node: The Node ID of the sender. + * @dst_node: The Node ID of the intended receiver. + * @status: IPC buffer usage status + */ +struct ivpu_ipc_hdr { + u32 data_addr; + u32 data_size; + u16 channel; + u8 src_node; + u8 dst_node; + u8 status; +} __packed __aligned(IVPU_IPC_ALIGNMENT); + +struct ivpu_ipc_consumer { + struct list_head link; + u32 channel; + u32 tx_vpu_addr; + u32 request_id; + + spinlock_t rx_msg_lock; /* Protects rx_msg_list */ + struct list_head rx_msg_list; + wait_queue_head_t rx_msg_wq; +}; + +struct ivpu_ipc_info { + struct gen_pool *mm_tx; + struct ivpu_bo *mem_tx; + struct ivpu_bo *mem_rx; + + atomic_t rx_msg_count; + + spinlock_t cons_list_lock; /* Protects cons_list */ + struct list_head cons_list; + + atomic_t request_id; + struct mutex lock; /* Lock on status */ + bool on; +}; + +int ivpu_ipc_init(struct ivpu_device *vdev); +void ivpu_ipc_fini(struct ivpu_device *vdev); + +void ivpu_ipc_enable(struct ivpu_device *vdev); +void ivpu_ipc_disable(struct ivpu_device *vdev); +void ivpu_ipc_reset(struct ivpu_device *vdev); + +int ivpu_ipc_irq_handler(struct ivpu_device *vdev); + +void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + u32 channel); +void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons); + +int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload, + unsigned long timeout_ms); + +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, + unsigned long timeout_ms); + +#endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c new file mode 100644 index 000000000000..af77dafac97e --- /dev/null +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_ipc.h" +#include "ivpu_jsm_msg.h" + +int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, + u64 jobq_base, u32 jobq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_REGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.register_db.db_idx = db_id; + req.payload.register_db.jobq_base = jobq_base; + req.payload.register_db.jobq_size = jobq_size; + req.payload.register_db.host_ssid = ctx_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); + return ret; + } + + ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id); + + return 0; +} + +int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_UNREGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.unregister_db.db_idx = db_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); + return ret; + } + + ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id); + + return 0; +} + +int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; + struct vpu_jsm_msg resp; + int ret; + + if (engine > VPU_ENGINE_COPY) + return -EINVAL; + + req.payload.query_engine_hb.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret); + return ret; + } + + *heartbeat = resp.payload.query_engine_hb_done.heartbeat; + return ret; +} + +int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_RESET }; + struct vpu_jsm_msg resp; + int ret; + + if (engine > VPU_ENGINE_COPY) + return -EINVAL; + + req.payload.engine_reset.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret); + + return ret; +} + +int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_PREEMPT }; + struct vpu_jsm_msg resp; + int ret; + + if (engine > VPU_ENGINE_COPY) + return -EINVAL; + + req.payload.engine_preempt.engine_idx = engine; + req.payload.engine_preempt.preempt_id = preempt_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret); + + return ret; +} + +int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DYNDBG_CONTROL }; + struct vpu_jsm_msg resp; + int ret; + + if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1)) + return -ENOMEM; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); + + return ret; +} + +int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask, + u64 *trace_hw_component_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_GET_CAPABILITY }; + struct vpu_jsm_msg resp; + int ret; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret); + return ret; + } + + *trace_destination_mask = resp.payload.trace_capability.trace_destination_mask; + *trace_hw_component_mask = resp.payload.trace_capability.trace_hw_component_mask; + + return ret; +} + +int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask, + u64 trace_hw_component_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_SET_CONFIG }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.trace_config.trace_level = trace_level; + req.payload.trace_config.trace_destination_mask = trace_destination_mask; + req.payload.trace_config.trace_hw_component_mask = trace_hw_component_mask; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn(vdev, "Failed to set config: %d\n", ret); + + return ret; +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h new file mode 100644 index 000000000000..1a3e2e2740bd --- /dev/null +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_JSM_MSG_H__ +#define __IVPU_JSM_MSG_H__ + +#include "vpu_jsm_api.h" + +int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, + u64 jobq_base, u32 jobq_size); +int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id); +int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat); +int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine); +int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id); +int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size); +int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask, + u64 *trace_hw_component_mask); +int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask, + u64 trace_hw_component_mask); + +#endif diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h new file mode 100644 index 000000000000..1096cab0334e --- /dev/null +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -0,0 +1,999 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +/** + * @file + * @brief JSM shared definitions + * + * @ingroup Jsm + * @brief JSM shared definitions + * @{ + */ +#ifndef VPU_JSM_API_H +#define VPU_JSM_API_H + +/* + * Major version changes that break backward compatibility + */ +#define VPU_JSM_API_VER_MAJOR 2 + +/* + * Minor version changes when API backward compatibility is preserved. + */ +#define VPU_JSM_API_VER_MINOR 10 + +/* + * API header changed (field names, documentation, formatting) but API itself has not been changed + */ +#define VPU_JSM_API_VER_PATCH 1 + +/* + * Index in the API version table + */ +#define VPU_JSM_API_VER_INDEX 4 + +/* + * Number of Priority Bands for Hardware Scheduling + * Bands: RealTime, Focus, Normal, Idle + */ +#define VPU_HWS_NUM_PRIORITY_BANDS 4 + +/* Max number of impacted contexts that can be dealt with the engine reset command */ +#define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3 + +/** Pack the API structures for now, once alignment issues are fixed this can be removed */ +#pragma pack(push, 1) + +/* + * Engine indexes. + */ +#define VPU_ENGINE_COMPUTE 0 +#define VPU_ENGINE_COPY 1 +#define VPU_ENGINE_NB 2 + +/* + * VPU status values. + */ +#define VPU_JSM_STATUS_SUCCESS 0x0U +#define VPU_JSM_STATUS_PARSING_ERR 0x1U +#define VPU_JSM_STATUS_PROCESSING_ERR 0x2U +#define VPU_JSM_STATUS_PREEMPTED 0x3U +#define VPU_JSM_STATUS_ABORTED 0x4U +#define VPU_JSM_STATUS_USER_CTX_VIOL_ERR 0x5U +#define VPU_JSM_STATUS_GLOBAL_CTX_VIOL_ERR 0x6U +#define VPU_JSM_STATUS_MVNCI_WRONG_INPUT_FORMAT 0x7U +#define VPU_JSM_STATUS_MVNCI_UNSUPPORTED_NETWORK_ELEMENT 0x8U +#define VPU_JSM_STATUS_MVNCI_INVALID_HANDLE 0x9U +#define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES 0xAU +#define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED 0xBU +#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU +/* Job status returned when the job was preempted mid-inference */ +#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU + +/* + * Host <-> VPU IPC channels. + * ASYNC commands use a high priority channel, other messages use low-priority ones. + */ +#define VPU_IPC_CHAN_ASYNC_CMD 0 +#define VPU_IPC_CHAN_GEN_CMD 10 +#define VPU_IPC_CHAN_JOB_RET 11 + +/* + * Job flags bit masks. + */ +#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 + +/* + * Sizes of the reserved areas in jobs, in bytes. + */ +#define VPU_JOB_RESERVED_BYTES 16 +/* + * Sizes of the reserved areas in job queues, in bytes. + */ +#define VPU_JOB_QUEUE_RESERVED_BYTES 52 + +/* + * Max length (including trailing NULL char) of trace entity name (e.g., the + * name of a logging destination or a loggable HW component). + */ +#define VPU_TRACE_ENTITY_NAME_MAX_LEN 32 + +/* + * Max length (including trailing NULL char) of a dyndbg command. + * + * NOTE: 112 is used so that the size of 'struct vpu_ipc_msg' in the JSM API is + * 128 bytes (multiple of 64 bytes, the cache line size). + */ +#define VPU_DYNDBG_CMD_MAX_LEN 112 + +/* + * Job format. + */ +struct vpu_job_queue_entry { + u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ + u32 job_id; /**< Job ID */ + u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u64 root_page_table_addr; /**< Address of root page table to use for this job */ + u64 root_page_table_update_counter; /**< Page tables update events counter */ + u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */ + u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */ + u8 reserved[VPU_JOB_RESERVED_BYTES]; +}; + +/* + * Job queue control registers. + */ +struct vpu_job_queue_header { + u32 engine_idx; + u32 head; + u32 tail; + u8 reserved[VPU_JOB_QUEUE_RESERVED_BYTES]; +}; + +/* + * Job queue format. + */ +struct vpu_job_queue { + struct vpu_job_queue_header header; + struct vpu_job_queue_entry job[]; +}; + +/** + * Logging entity types. + * + * This enum defines the different types of entities involved in logging. + */ +enum vpu_trace_entity_type { + /** Logging destination (entity where logs can be stored / printed). */ + VPU_TRACE_ENTITY_TYPE_DESTINATION = 1, + /** Loggable HW component (HW entity that can be logged). */ + VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2, +}; + +/* + * Host <-> VPU IPC messages types. + */ +enum vpu_ipc_msg_type { + VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, + /* IPC Host -> Device, Async commands */ + VPU_JSM_MSG_ASYNC_CMD = 0x1100, + VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, + VPU_JSM_MSG_REGISTER_DB = 0x1102, + VPU_JSM_MSG_UNREGISTER_DB = 0x1103, + VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104, + VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105, + VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106, + VPU_JSM_MSG_SET_POWER_LEVEL = 0x1107, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_OPEN = 0x1108, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_CLOSE = 0x1109, + /** Configure logging (used to modify configuration passed in boot params). */ + VPU_JSM_MSG_TRACE_SET_CONFIG = 0x110a, + /** Return current logging configuration. */ + VPU_JSM_MSG_TRACE_GET_CONFIG = 0x110b, + /** + * Get masks of destinations and HW components supported by the firmware + * (may vary between HW generations and FW compile + * time configurations) + */ + VPU_JSM_MSG_TRACE_GET_CAPABILITY = 0x110c, + /** Get the name of a destination or HW component. */ + VPU_JSM_MSG_TRACE_GET_NAME = 0x110d, + /** + * Release resource associated with host ssid . All jobs that belong to the host_ssid + * aborted and removed from internal scheduling queues. All doorbells assigned + * to the host_ssid are unregistered and any internal FW resources belonging to + * the host_ssid are released. + */ + VPU_JSM_MSG_SSID_RELEASE = 0x110e, + /** + * Start collecting metric data. + * @see vpu_jsm_metric_streamer_start + */ + VPU_JSM_MSG_METRIC_STREAMER_START = 0x110f, + /** + * Stop collecting metric data. This command will return success if it is called + * for a metric stream that has already been stopped or was never started. + * @see vpu_jsm_metric_streamer_stop + */ + VPU_JSM_MSG_METRIC_STREAMER_STOP = 0x1110, + /** + * Update current and next buffer for metric data collection. This command can + * also be used to request information about the number of collected samples + * and the amount of data written to the buffer. + * @see vpu_jsm_metric_streamer_update + */ + VPU_JSM_MSG_METRIC_STREAMER_UPDATE = 0x1111, + /** + * Request description of selected metric groups and metric counters within + * each group. The VPU will write the description of groups and counters to + * the buffer specified in the command structure. + * @see vpu_jsm_metric_streamer_start + */ + VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112, + /** Control command: Priority band setup */ + VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113, + /** Control command: Create command queue */ + VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114, + /** Control command: Destroy command queue */ + VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115, + /** Control command: Set context scheduling properties */ + VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116, + /* + * Register a doorbell to notify VPU of new work. The doorbell may later be + * deallocated or reassigned to another context. + */ + VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117, + /* IPC Host -> Device, General commands */ + VPU_JSM_MSG_GENERAL_CMD = 0x1200, + VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + /** + * Control dyndbg behavior by executing a dyndbg command; equivalent to + * Linux command: `echo '' > /dynamic_debug/control`. + */ + VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201, + /* IPC Device -> Host, Job completion */ + VPU_JSM_MSG_JOB_DONE = 0x2100, + /* IPC Device -> Host, Async command completion */ + VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, + VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, + VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201, + VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202, + VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203, + VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204, + VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205, + VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206, + VPU_JSM_MSG_SET_POWER_LEVEL_DONE = 0x2207, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE = 0x2208, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE = 0x2209, + /** Response to VPU_JSM_MSG_TRACE_SET_CONFIG. */ + VPU_JSM_MSG_TRACE_SET_CONFIG_RSP = 0x220a, + /** Response to VPU_JSM_MSG_TRACE_GET_CONFIG. */ + VPU_JSM_MSG_TRACE_GET_CONFIG_RSP = 0x220b, + /** Response to VPU_JSM_MSG_TRACE_GET_CAPABILITY. */ + VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c, + /** Response to VPU_JSM_MSG_TRACE_GET_NAME. */ + VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d, + /** Response to VPU_JSM_MSG_SSID_RELEASE. */ + VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_START. + * VPU will return an error result if metric collection cannot be started, + * e.g. when the specified metric mask is invalid. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_START_DONE = 0x220f, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_STOP. + * Returns information about collected metric data. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE = 0x2210, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_UPDATE. + * Returns information about collected metric data. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE = 0x2211, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_INFO. + * Returns a description of the metric groups and metric counters. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE = 0x2212, + /** + * Asynchronous event sent from the VPU to the host either when the current + * metric buffer is full or when the VPU has collected a multiple of + * @notify_sample_count samples as indicated through the start command + * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected + * metric data. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213, + /** Response to control command: Priority band setup */ + VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214, + /** Response to control command: Create command queue */ + VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215, + /** Response to control command: Destroy command queue */ + VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216, + /** Response to control command: Set context scheduling properties */ + VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217, + /* IPC Device -> Host, General command completion */ + VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, + VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, + /** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */ + VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301, +}; + +enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED }; + +/* + * Host <-> LRT IPC message payload definitions + */ +struct vpu_ipc_msg_payload_engine_reset { + /* Engine to be reset. */ + u32 engine_idx; +}; + +struct vpu_ipc_msg_payload_engine_preempt { + /* Engine to be preempted. */ + u32 engine_idx; + /* ID of the preemption request. */ + u32 preempt_id; +}; + +/* + * @brief Register doorbell command structure. + * This structure supports doorbell registration for only OS scheduling. + * @see VPU_JSM_MSG_REGISTER_DB + */ +struct vpu_ipc_msg_payload_register_db { + /* Index of the doorbell to register. */ + u32 db_idx; + /* Virtual address in Global GTT pointing to the start of job queue. */ + u64 jobq_base; + /* Size of the job queue in bytes. */ + u32 jobq_size; + /* Host sub-stream ID for the context assigned to the doorbell. */ + u32 host_ssid; +}; + +/** + * @brief Unregister doorbell command structure. + * Request structure to unregister a doorbell for both HW and OS scheduling. + * @see VPU_JSM_MSG_UNREGISTER_DB + */ +struct vpu_ipc_msg_payload_unregister_db { + /* Index of the doorbell to unregister. */ + u32 db_idx; +}; + +struct vpu_ipc_msg_payload_query_engine_hb { + /* Engine to return heartbeat value. */ + u32 engine_idx; +}; + +struct vpu_ipc_msg_payload_power_level { + /** + * Requested power level. The power level value is in the + * range [0, power_level_count-1] where power_level_count + * is the number of available power levels as returned by + * the get power level count command. A power level of 0 + * corresponds to the maximum possible power level, while + * power_level_count-1 corresponds to the minimum possible + * power level. Values outside of this range are not + * considered to be valid. + */ + u32 power_level; +}; + +struct vpu_ipc_msg_payload_ssid_release { + /* Host sub-stream ID for the context to be released. */ + u32 host_ssid; +}; + +/** + * @brief Metric streamer start command structure. + * This structure is also used with VPU_JSM_MSG_METRIC_STREAMER_INFO to request metric + * groups and metric counters description from the firmware. + * @see VPU_JSM_MSG_METRIC_STREAMER_START + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO + */ +struct vpu_jsm_metric_streamer_start { + /** + * Bitmask to select the desired metric groups. + * A metric group can belong only to one metric streamer instance at a time. + * Since each metric streamer instance has a unique set of metric groups, it + * can also identify a metric streamer instance if more than one instance was + * started. If the VPU device does not support multiple metric streamer instances, + * then VPU_JSM_MSG_METRIC_STREAMER_START will return an error even if the second + * instance has different groups to the first. + */ + u64 metric_group_mask; + /** Sampling rate in nanoseconds. */ + u64 sampling_rate; + /** + * If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message + * after every @notify_sample_count samples is collected or dropped by the VPU. + * If set to UINT_MAX the VPU will only generate a notification when the metric + * buffer is full. If set to 0 the VPU will never generate a notification. + */ + u32 notify_sample_count; + u32 reserved_0; + /** + * Address and size of the buffer where the VPU will write metric data. The + * VPU writes all counters from enabled metric groups one after another. If + * there is no space left to write data at the next sample period the VPU + * will switch to the next buffer (@see next_buffer_addr) and will optionally + * send a notification to the host driver if @notify_sample_count is non-zero. + * If @next_buffer_addr is NULL the VPU will stop collecting metric data. + */ + u64 buffer_addr; + u64 buffer_size; + /** + * Address and size of the next buffer to write metric data to after the initial + * buffer is full. If the address is NULL the VPU will stop collecting metric + * data. + */ + u64 next_buffer_addr; + u64 next_buffer_size; +}; + +static_assert(sizeof(struct vpu_jsm_metric_streamer_start) % 8 == 0, + "vpu_jsm_metric_streamer_start is misaligned"); + +/** + * @brief Metric streamer stop command structure. + * @see VPU_JSM_MSG_METRIC_STREAMER_STOP + */ +struct vpu_jsm_metric_streamer_stop { + /** Bitmask to select the desired metric groups. */ + u64 metric_group_mask; +}; + +static_assert(sizeof(struct vpu_jsm_metric_streamer_stop) % 8 == 0, + "vpu_jsm_metric_streamer_stop is misaligned"); + +/** + * Provide VPU FW with buffers to write metric data. + * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE + */ +struct vpu_jsm_metric_streamer_update { + /** Metric group mask that identifies metric streamer instance. */ + u64 metric_group_mask; + /** + * Address and size of the buffer where the VPU will write metric data. If + * the buffer address is 0 or same as the currently used buffer the VPU will + * continue writing metric data to the current buffer. In this case the + * buffer size is ignored and the size of the current buffer is unchanged. + * If the address is non-zero and differs from the current buffer address the + * VPU will immediately switch data collection to the new buffer. + */ + u64 buffer_addr; + u64 buffer_size; + /** + * Address and size of the next buffer to write metric data after the initial + * buffer is full. If the address is NULL the VPU will stop collecting metric + * data but will continue to record dropped samples. + * + * Note that there is a hazard possible if both buffer_addr and the next_buffer_addr + * are non-zero in same update request. It is the host's responsibility to ensure + * that both addresses make sense even if the VPU just switched to writing samples + * from the current to the next buffer. + */ + u64 next_buffer_addr; + u64 next_buffer_size; +}; + +static_assert(sizeof(struct vpu_jsm_metric_streamer_update) % 8 == 0, + "vpu_jsm_metric_streamer_update is misaligned"); + +struct vpu_ipc_msg_payload_blob_deinit { + /* 64-bit unique ID for the blob to be de-initialized. */ + u64 blob_id; +}; + +struct vpu_ipc_msg_payload_job_done { + /* Engine to which the job was submitted. */ + u32 engine_idx; + /* Index of the doorbell to which the job was submitted */ + u32 db_idx; + /* ID of the completed job */ + u32 job_id; + /* Status of the completed job */ + u32 job_status; + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; +}; + +struct vpu_jsm_engine_reset_context { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; + /* Flags: 0: cause of hang; 1: collateral damage of reset */ + u64 flags; +}; + +struct vpu_ipc_msg_payload_engine_reset_done { + /* Engine ordinal */ + u32 engine_idx; + /* Number of impacted contexts */ + u32 num_impacted_contexts; + /* Array of impacted command queue ids and their flags */ + struct vpu_jsm_engine_reset_context + impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS]; +}; + +struct vpu_ipc_msg_payload_engine_preempt_done { + /* Engine preempted. */ + u32 engine_idx; + /* ID of the preemption request. */ + u32 preempt_id; +}; + +/** + * Response structure for register doorbell command for both OS + * and HW scheduling. + * @see VPU_JSM_MSG_REGISTER_DB + * @see VPU_JSM_MSG_HWS_REGISTER_DB + */ +struct vpu_ipc_msg_payload_register_db_done { + /* Index of the registered doorbell. */ + u32 db_idx; +}; + +/** + * Response structure for unregister doorbell command for both OS + * and HW scheduling. + * @see VPU_JSM_MSG_UNREGISTER_DB + */ +struct vpu_ipc_msg_payload_unregister_db_done { + /* Index of the unregistered doorbell. */ + u32 db_idx; +}; + +struct vpu_ipc_msg_payload_query_engine_hb_done { + /* Engine returning heartbeat value. */ + u32 engine_idx; + /* Heartbeat value. */ + u64 heartbeat; +}; + +struct vpu_ipc_msg_payload_get_power_level_count_done { + /** + * Number of supported power levels. The maximum possible + * value of power_level_count is 16 but this may vary across + * implementations. + */ + u32 power_level_count; + /** + * Power consumption limit for each supported power level in + * [0-100%] range relative to power level 0. + */ + u8 power_limit[16]; +}; + +struct vpu_ipc_msg_payload_blob_deinit_done { + /* 64-bit unique ID for the blob de-initialized. */ + u64 blob_id; +}; + +/* HWS priority band setup request / response */ +struct vpu_ipc_msg_payload_hws_priority_band_setup { + /* + * Grace period in 100ns units when preempting another priority band for + * this priority band + */ + u64 grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + /* + * Default quantum in 100ns units for scheduling across processes + * within a priority band + */ + u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; + /* + * Default grace period in 100ns units for processes that preempt each + * other within a priority band + */ + u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + /* + * For normal priority band, specifies the target VPU percentage + * in situations when it's starved by the focus band. + */ + u32 normal_band_percentage; +}; + +/* HWS create command queue request */ +struct vpu_ipc_msg_payload_hws_create_cmdq { + /* Process id */ + u64 process_id; + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; + /* Command queue base */ + u64 cmdq_base; + /* Command queue size */ + u32 cmdq_size; +}; + +/* HWS create command queue response */ +struct vpu_ipc_msg_payload_hws_create_cmdq_rsp { + /* Process id */ + u64 process_id; + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; +}; + +/* HWS destroy command queue request / response */ +struct vpu_ipc_msg_payload_hws_destroy_cmdq { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; +}; + +/* HWS set context scheduling properties request / response */ +struct vpu_ipc_msg_payload_hws_set_context_sched_properties { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved_0; + /* Command queue id */ + u64 cmdq_id; + /* Priority band to assign to work of this context */ + u32 priority_band; + /* Inside realtime band assigns a further priority */ + u32 realtime_priority_level; + /* Priority relative to other contexts in the same process */ + u32 in_process_priority; + /* Zero padding / Reserved */ + u32 reserved_1; + /* Context quantum relative to other contexts of same priority in the same process */ + u64 context_quantum; + /* Grace period when preempting context of the same priority within the same process */ + u64 grace_period_same_priority; + /* Grace period when preempting context of a lower priority within the same process */ + u64 grace_period_lower_priority; +}; + +/* + * @brief Register doorbell command structure. + * This structure supports doorbell registration for both HW and OS scheduling. + * Note: Queue base and size are added here so that the same structure can be used for + * OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored + * and cmdq_base and cmdq_size will be used. For HW scheduling, cmdq_base and cmdq_size will be + * ignored and cmdq_id is used. + * @see VPU_JSM_MSG_HWS_REGISTER_DB + */ +struct vpu_jsm_hws_register_db { + /* Index of the doorbell to register. */ + u32 db_id; + /* Host sub-stream ID for the context assigned to the doorbell. */ + u32 host_ssid; + /* ID of the command queue associated with the doorbell. */ + u64 cmdq_id; + /* Virtual address pointing to the start of command queue. */ + u64 cmdq_base; + /* Size of the command queue in bytes. */ + u64 cmdq_size; +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and + * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages. + * + * The payload is interpreted differently depending on the type of message: + * + * - For VPU_JSM_MSG_TRACE_SET_CONFIG, the payload specifies the desired + * logging configuration to be set. + * + * - For VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, the payload reports the logging + * configuration that was set after a VPU_JSM_MSG_TRACE_SET_CONFIG request. + * The host can compare this payload with the one it sent in the + * VPU_JSM_MSG_TRACE_SET_CONFIG request to check whether or not the + * configuration was set as desired. + * + * - VPU_JSM_MSG_TRACE_GET_CONFIG_RSP, the payload reports the current logging + * configuration. + */ +struct vpu_ipc_msg_payload_trace_config { + /** + * Logging level (currently set or to be set); see 'mvLog_t' enum for + * acceptable values. The specified logging level applies to all + * destinations and HW components + */ + u32 trace_level; + /** + * Bitmask of logging destinations (currently enabled or to be enabled); + * bitwise OR of values defined in logging_destination enum. + */ + u32 trace_destination_mask; + /** + * Bitmask of loggable HW components (currently enabled or to be enabled); + * bitwise OR of values defined in loggable_hw_component enum. + */ + u64 trace_hw_component_mask; + u64 reserved_0; /**< Reserved for future extensions. */ +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP messages. + */ +struct vpu_ipc_msg_payload_trace_capability_rsp { + u32 trace_destination_mask; /**< Bitmask of supported logging destinations. */ + u32 reserved_0; + u64 trace_hw_component_mask; /**< Bitmask of supported loggable HW components. */ + u64 reserved_1; /**< Reserved for future extensions. */ +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_GET_NAME requests. + */ +struct vpu_ipc_msg_payload_trace_get_name { + /** + * The type of the entity to query name for; see logging_entity_type for + * possible values. + */ + u32 entity_type; + u32 reserved_0; + /** + * The ID of the entity to query name for; possible values depends on the + * entity type. + */ + u64 entity_id; +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_GET_NAME_RSP responses. + */ +struct vpu_ipc_msg_payload_trace_get_name_rsp { + /** + * The type of the entity whose name was queried; see logging_entity_type + * for possible values. + */ + u32 entity_type; + u32 reserved_0; + /** + * The ID of the entity whose name was queried; possible values depends on + * the entity type. + */ + u64 entity_id; + /** Reserved for future extensions. */ + u64 reserved_1; + /** The name of the entity. */ + char entity_name[VPU_TRACE_ENTITY_NAME_MAX_LEN]; +}; + +/** + * Data sent from the VPU to the host in all metric streamer response messages + * and in asynchronous notification. + * @see VPU_JSM_MSG_METRIC_STREAMER_START_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION + */ +struct vpu_jsm_metric_streamer_done { + /** Metric group mask that identifies metric streamer instance. */ + u64 metric_group_mask; + /** + * Size in bytes of single sample - total size of all enabled counters. + * Some VPU implementations may align sample_size to more than 8 bytes. + */ + u32 sample_size; + u32 reserved_0; + /** + * Number of samples collected since the metric streamer was started. + * This will be 0 if the metric streamer was not started. + */ + u32 samples_collected; + /** + * Number of samples dropped since the metric streamer was started. This + * is incremented every time the metric streamer is not able to write + * collected samples because the current buffer is full and there is no + * next buffer to switch to. + */ + u32 samples_dropped; + /** Address of the buffer that contains the latest metric data. */ + u64 buffer_addr; + /** + * Number of bytes written into the metric data buffer. In response to the + * VPU_JSM_MSG_METRIC_STREAMER_INFO request this field contains the size of + * all group and counter descriptors. The size is updated even if the buffer + * in the request was NULL or too small to hold descriptors of all counters + */ + u64 bytes_written; +}; + +static_assert(sizeof(struct vpu_jsm_metric_streamer_done) % 8 == 0, + "vpu_jsm_metric_streamer_done is misaligned"); + +/** + * Metric group description placed in the metric buffer after successful completion + * of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more + * @vpu_jsm_metric_counter_descriptor records. + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO + */ +struct vpu_jsm_metric_group_descriptor { + /** + * Offset to the next metric group (8-byte aligned). If this offset is 0 this + * is the last descriptor. The value of metric_info_size must be greater than + * or equal to sizeof(struct vpu_jsm_metric_group_descriptor) + name_string_size + * + description_string_size and must be 8-byte aligned. + */ + u32 next_metric_group_info_offset; + /** + * Offset to the first metric counter description record (8-byte aligned). + * @see vpu_jsm_metric_counter_descriptor + */ + u32 next_metric_counter_info_offset; + /** Index of the group. This corresponds to bit index in metric_group_mask. */ + u32 group_id; + /** Number of counters in the metric group. */ + u32 num_counters; + /** Data size for all counters, must be a multiple of 8 bytes.*/ + u32 metric_group_data_size; + /** + * Metric group domain number. Cannot use multiple, simultaneous metric groups + * from the same domain. + */ + u32 domain; + /** + * Counter name string size. The string must include a null termination character. + * The FW may use a fixed size name or send a different name for each counter. + * If the VPU uses fixed size strings, all characters from the end of the name + * to the of the fixed size character array must be zeroed. + */ + u32 name_string_size; + /** Counter description string size, @see name_string_size */ + u32 description_string_size; + u32 reserved_0[2]; + /** + * Right after this structure, the VPU writes name and description of + * the metric group. + */ +}; + +static_assert(sizeof(struct vpu_jsm_metric_group_descriptor) % 8 == 0, + "vpu_jsm_metric_group_descriptor is misaligned"); + +/** + * Metric counter description, placed in the buffer after vpu_jsm_metric_group_descriptor. + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO + */ +struct vpu_jsm_metric_counter_descriptor { + /** + * Offset to the next counter in a group (8-byte aligned). If this offset is + * 0 this is the last counter in the group. + */ + u32 next_metric_counter_info_offset; + /** + * Offset to the counter data from the start of samples in this metric group. + * Note that metric_data_offset % metric_data_size must be 0. + */ + u32 metric_data_offset; + /** Size of the metric counter data in bytes. */ + u32 metric_data_size; + /** Metric type, see Level Zero API for definitions. */ + u32 tier; + /** Metric type, see set_metric_type_t for definitions. */ + u32 metric_type; + /** Metric type, see set_value_type_t for definitions. */ + u32 metric_value_type; + /** + * Counter name string size. The string must include a null termination character. + * The FW may use a fixed size name or send a different name for each counter. + * If the VPU uses fixed size strings, all characters from the end of the name + * to the of the fixed size character array must be zeroed. + */ + u32 name_string_size; + /** Counter description string size, @see name_string_size */ + u32 description_string_size; + /** Counter component name string size, @see name_string_size */ + u32 component_string_size; + /** Counter string size, @see name_string_size */ + u32 units_string_size; + u32 reserved_0[2]; + /** + * Right after this structure, the VPU writes name, description + * component and unit strings. + */ +}; + +static_assert(sizeof(struct vpu_jsm_metric_counter_descriptor) % 8 == 0, + "vpu_jsm_metric_counter_descriptor is misaligned"); + +/** + * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests. + * + * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug + * feature, which allows developers to selectively enable / disable MVLOG_DEBUG + * messages. This is equivalent to the Dynamic Debug functionality provided by + * Linux + * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html) + * The host can control Dynamic Debug behavior by sending dyndbg commands, which + * have the same syntax as Linux + * dyndbg commands. + * + * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host + * still has to set the logging level to MVLOG_DEBUG, using the + * VPU_JSM_MSG_TRACE_SET_CONFIG command. + * + * The host can see the current dynamic debug configuration by executing a + * special 'show' command. The dyndbg configuration will be printed to the + * configured logging destination using MVLOG_INFO logging level. + */ +struct vpu_ipc_msg_payload_dyndbg_control { + /** + * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated + * string. + */ + char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN]; +}; + +/* + * Payloads union, used to define complete message format. + */ +union vpu_ipc_msg_payload { + struct vpu_ipc_msg_payload_engine_reset engine_reset; + struct vpu_ipc_msg_payload_engine_preempt engine_preempt; + struct vpu_ipc_msg_payload_register_db register_db; + struct vpu_ipc_msg_payload_unregister_db unregister_db; + struct vpu_ipc_msg_payload_query_engine_hb query_engine_hb; + struct vpu_ipc_msg_payload_power_level power_level; + struct vpu_jsm_metric_streamer_start metric_streamer_start; + struct vpu_jsm_metric_streamer_stop metric_streamer_stop; + struct vpu_jsm_metric_streamer_update metric_streamer_update; + struct vpu_ipc_msg_payload_blob_deinit blob_deinit; + struct vpu_ipc_msg_payload_ssid_release ssid_release; + struct vpu_jsm_hws_register_db hws_register_db; + struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; + struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; + struct vpu_ipc_msg_payload_register_db_done register_db_done; + struct vpu_ipc_msg_payload_unregister_db_done unregister_db_done; + struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; + struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; + struct vpu_jsm_metric_streamer_done metric_streamer_done; + struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; + struct vpu_ipc_msg_payload_trace_config trace_config; + struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; + struct vpu_ipc_msg_payload_trace_get_name trace_get_name; + struct vpu_ipc_msg_payload_trace_get_name_rsp trace_get_name_rsp; + struct vpu_ipc_msg_payload_dyndbg_control dyndbg_control; + struct vpu_ipc_msg_payload_hws_priority_band_setup hws_priority_band_setup; + struct vpu_ipc_msg_payload_hws_create_cmdq hws_create_cmdq; + struct vpu_ipc_msg_payload_hws_create_cmdq_rsp hws_create_cmdq_rsp; + struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq; + struct vpu_ipc_msg_payload_hws_set_context_sched_properties + hws_set_context_sched_properties; +}; + +/* + * Host <-> LRT IPC message base structure. + * + * NOTE: All instances of this object must be aligned on a 64B boundary + * to allow proper handling of VPU cache operations. + */ +struct vpu_jsm_msg { + /* Message type, see vpu_ipc_msg_type enum. */ + u32 type; + /* Buffer status, see vpu_ipc_msg_status enum. */ + u32 status; + /* + * Request ID, provided by the host in a request message and passed + * back by VPU in the response message. + */ + u32 request_id; + /* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */ + u32 result; + /* Message payload depending on message type, see vpu_ipc_msg_payload union. */ + union vpu_ipc_msg_payload payload; +}; + +#pragma pack(pop) + +#endif + +///@} From 02d5b0aacd0590dbaf25f35834631e5bc11002e3 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:21 +0100 Subject: [PATCH 18/45] accel/ivpu: Implement firmware parsing and booting Read, parse and boot VPU firmware image. Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-6-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 1 + drivers/accel/ivpu/ivpu_drv.c | 131 +++++++++- drivers/accel/ivpu/ivpu_drv.h | 11 + drivers/accel/ivpu/ivpu_fw.c | 419 ++++++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_fw.h | 38 +++ drivers/accel/ivpu/ivpu_hw_mtl.c | 10 + drivers/accel/ivpu/vpu_boot_api.h | 349 +++++++++++++++++++++++++ include/uapi/drm/ivpu_accel.h | 21 ++ 8 files changed, 979 insertions(+), 1 deletion(-) create mode 100644 drivers/accel/ivpu/ivpu_fw.c create mode 100644 drivers/accel/ivpu/ivpu_fw.h create mode 100644 drivers/accel/ivpu/vpu_boot_api.h diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 46595f0112e3..9fa6a76e9d79 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -3,6 +3,7 @@ intel_vpu-y := \ ivpu_drv.o \ + ivpu_fw.o \ ivpu_gem.o \ ivpu_hw_mtl.o \ ivpu_ipc.o \ diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 1eb12e157acd..0b43e880c99c 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -14,10 +14,13 @@ #include #include +#include "vpu_boot_api.h" #include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" +#include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -32,6 +35,10 @@ int ivpu_dbg_mask; module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); +int ivpu_test_mode; +module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); +MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw"); + u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); @@ -129,6 +136,28 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f case DRM_IVPU_PARAM_CONTEXT_ID: args->value = file_priv->ctx.id; break; + case DRM_IVPU_PARAM_FW_API_VERSION: + if (args->index < VPU_FW_API_VER_NUM) { + struct vpu_firmware_header *fw_hdr; + + fw_hdr = (struct vpu_firmware_header *)vdev->fw->file->data; + args->value = fw_hdr->api_version[args->index]; + } else { + ret = -EINVAL; + } + break; + case DRM_IVPU_PARAM_ENGINE_HEARTBEAT: + ret = ivpu_jsm_get_heartbeat(vdev, args->index, &args->value); + break; + case DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID: + args->value = (u64)atomic64_inc_return(&vdev->unique_id_counter); + break; + case DRM_IVPU_PARAM_TILE_CONFIG: + args->value = vdev->hw->tile_fuse; + break; + case DRM_IVPU_PARAM_SKU: + args->value = vdev->hw->sku; + break; default: ret = -EINVAL; break; @@ -225,11 +254,85 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), }; +static int ivpu_wait_for_ready(struct ivpu_device *vdev) +{ + struct ivpu_ipc_consumer cons; + struct ivpu_ipc_hdr ipc_hdr; + unsigned long timeout; + int ret; + + if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST) + return 0; + + ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG); + + timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot); + while (1) { + ret = ivpu_ipc_irq_handler(vdev); + if (ret) + break; + ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0); + if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout)) + break; + + cond_resched(); + } + + ivpu_ipc_consumer_del(vdev, &cons); + + if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) { + ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n", + ipc_hdr.data_addr); + return -EIO; + } + + if (!ret) + ivpu_info(vdev, "VPU ready message received successfully\n"); + else + ivpu_hw_diagnose_failure(vdev); + + return ret; +} + +/** + * ivpu_boot() - Start VPU firmware + * @vdev: VPU device + * + * This function is paired with ivpu_shutdown() but it doesn't power up the + * VPU because power up has to be called very early in ivpu_probe(). + */ +int ivpu_boot(struct ivpu_device *vdev) +{ + int ret; + + /* Update boot params located at first 4KB of FW memory */ + ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr); + + ret = ivpu_hw_boot_fw(vdev); + if (ret) { + ivpu_err(vdev, "Failed to start the firmware: %d\n", ret); + return ret; + } + + ret = ivpu_wait_for_ready(vdev); + if (ret) { + ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + return ret; + } + + ivpu_hw_irq_clear(vdev); + enable_irq(vdev->irq); + ivpu_hw_irq_enable(vdev); + ivpu_ipc_enable(vdev); + return 0; +} + int ivpu_shutdown(struct ivpu_device *vdev) { int ret; ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); @@ -341,6 +444,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->mmu) return -ENOMEM; + vdev->fw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->fw), GFP_KERNEL); + if (!vdev->fw) + return -ENOMEM; + vdev->ipc = drmm_kzalloc(&vdev->drm, sizeof(*vdev->ipc), GFP_KERNEL); if (!vdev->ipc) return -ENOMEM; @@ -349,6 +456,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->platform = IVPU_PLATFORM_INVALID; vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; + atomic64_set(&vdev->unique_id_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); ret = ivpu_pci_init(vdev); @@ -389,14 +497,34 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_mmu_gctx_fini; } + ret = ivpu_fw_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret); + goto err_mmu_gctx_fini; + } + ret = ivpu_ipc_init(vdev); if (ret) { ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret); - goto err_mmu_gctx_fini; + goto err_fw_fini; + } + + ret = ivpu_fw_load(vdev); + if (ret) { + ivpu_err(vdev, "Failed to load firmware: %d\n", ret); + goto err_fw_fini; + } + + ret = ivpu_boot(vdev); + if (ret) { + ivpu_err(vdev, "Failed to boot: %d\n", ret); + goto err_fw_fini; } return 0; +err_fw_fini: + ivpu_fw_fini(vdev); err_mmu_gctx_fini: ivpu_mmu_global_context_fini(vdev); err_power_down: @@ -410,6 +538,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_shutdown(vdev); ivpu_ipc_fini(vdev); + ivpu_fw_fini(vdev); ivpu_mmu_global_context_fini(vdev); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 39b706d82e68..4f2c4268f1dd 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -74,6 +74,7 @@ struct ivpu_wa_table { struct ivpu_hw_info; struct ivpu_mmu_info; +struct ivpu_fw_info; struct ivpu_ipc_info; struct ivpu_device { @@ -86,12 +87,15 @@ struct ivpu_device { struct ivpu_wa_table wa; struct ivpu_hw_info *hw; struct ivpu_mmu_info *mmu; + struct ivpu_fw_info *fw; struct ivpu_ipc_info *ipc; struct ivpu_mmu_context gctx; struct xarray context_xa; struct xa_limit context_xa_limit; + atomic64_t unique_id_counter; + struct { int boot; int jsm; @@ -116,9 +120,16 @@ extern int ivpu_dbg_mask; extern u8 ivpu_pll_min_ratio; extern u8 ivpu_pll_max_ratio; +#define IVPU_TEST_MODE_DISABLED 0 +#define IVPU_TEST_MODE_FW_TEST 1 +#define IVPU_TEST_MODE_NULL_HW 2 +extern int ivpu_test_mode; + struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id); void ivpu_file_priv_put(struct ivpu_file_priv **link); + +int ivpu_boot(struct ivpu_device *vdev); int ivpu_shutdown(struct ivpu_device *vdev); static inline bool ivpu_is_mtl(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c new file mode 100644 index 000000000000..4baa0767a10d --- /dev/null +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include +#include +#include + +#include "vpu_boot_api.h" +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" + +#define FW_GLOBAL_MEM_START (2ull * SZ_1G) +#define FW_GLOBAL_MEM_END (3ull * SZ_1G) +#define FW_SHARED_MEM_SIZE SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */ +#define FW_SHARED_MEM_ALIGNMENT SZ_128K /* VPU MTRR limitation */ +#define FW_RUNTIME_MAX_SIZE SZ_512M +#define FW_SHAVE_NN_MAX_SIZE SZ_2M +#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) +#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) +#define FW_VERSION_HEADER_SIZE SZ_4K +#define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) + +#define WATCHDOG_MSS_REDIRECT 32 +#define WATCHDOG_NCE_REDIRECT 33 + +#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31) + +#define IVPU_FW_CHECK_API(vdev, fw_hdr, name) ivpu_fw_check_api(vdev, fw_hdr, #name, \ + VPU_##name##_API_VER_INDEX, \ + VPU_##name##_API_VER_MAJOR, \ + VPU_##name##_API_VER_MINOR) + +static char *ivpu_firmware; +module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); +MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/.."); + +static int ivpu_fw_request(struct ivpu_device *vdev) +{ + static const char * const fw_names[] = { + "mtl_vpu.bin", + "intel/vpu/mtl_vpu_v0.0.bin" + }; + int ret = -ENOENT; + int i; + + if (ivpu_firmware) + return request_firmware(&vdev->fw->file, ivpu_firmware, vdev->drm.dev); + + for (i = 0; i < ARRAY_SIZE(fw_names); i++) { + ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i], vdev->drm.dev); + if (!ret) + return 0; + } + + ivpu_err(vdev, "Failed to request firmware: %d\n", ret); + return ret; +} + +static void +ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr, + const char *str, int index, u16 expected_major, u16 expected_minor) +{ + u16 major = (u16)(fw_hdr->api_version[index] >> 16); + u16 minor = (u16)(fw_hdr->api_version[index]); + + if (major != expected_major) { + ivpu_warn(vdev, "Incompatible FW %s API version: %d.%d (expected %d.%d)\n", + str, major, minor, expected_major, expected_minor); + } + ivpu_dbg(vdev, FW_BOOT, "FW %s API version: %d.%d (expected %d.%d)\n", + str, major, minor, expected_major, expected_minor); +} + +static int ivpu_fw_parse(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data; + u64 runtime_addr, image_load_addr, runtime_size, image_size; + + if (fw->file->size <= FW_FILE_IMAGE_OFFSET) { + ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size); + return -EINVAL; + } + + if (fw_hdr->header_version != VPU_FW_HEADER_VERSION) { + ivpu_err(vdev, "Invalid firmware header version: %u\n", fw_hdr->header_version); + return -EINVAL; + } + + runtime_addr = fw_hdr->boot_params_load_address; + runtime_size = fw_hdr->runtime_size; + image_load_addr = fw_hdr->image_load_address; + image_size = fw_hdr->image_size; + + if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) { + ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr); + return -EINVAL; + } + + if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) { + ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size); + return -EINVAL; + } + + if (FW_FILE_IMAGE_OFFSET + image_size > fw->file->size) { + ivpu_err(vdev, "Invalid image size: %llu\n", image_size); + return -EINVAL; + } + + if (image_load_addr < runtime_addr || + image_load_addr + image_size > runtime_addr + runtime_size) { + ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n", + image_load_addr, image_size); + return -EINVAL; + } + + if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) { + ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size); + return -EINVAL; + } + + if (fw_hdr->entry_point < image_load_addr || + fw_hdr->entry_point >= image_load_addr + image_size) { + ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point); + return -EINVAL; + } + + fw->runtime_addr = runtime_addr; + fw->runtime_size = runtime_size; + fw->image_load_offset = image_load_addr - runtime_addr; + fw->image_size = image_size; + fw->shave_nn_size = PAGE_ALIGN(fw_hdr->shave_nn_fw_size); + + fw->cold_boot_entry_point = fw_hdr->entry_point; + fw->entry_point = fw->cold_boot_entry_point; + + ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", + fw_hdr->header_version, fw_hdr->image_format); + ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n", + fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size); + ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n", + fw->runtime_addr, image_load_addr, fw->entry_point); + ivpu_dbg(vdev, FW_BOOT, "FW version: %s\n", (char *)fw_hdr + VPU_FW_HEADER_SIZE); + + IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT); + IVPU_FW_CHECK_API(vdev, fw_hdr, JSM); + + return 0; +} + +static void ivpu_fw_release(struct ivpu_device *vdev) +{ + release_firmware(vdev->fw->file); +} + +static int ivpu_fw_update_global_range(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT); + u64 size = FW_SHARED_MEM_SIZE; + + if (start + size > FW_GLOBAL_MEM_END) { + ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size); + return -EINVAL; + } + + ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size); + return 0; +} + +static int ivpu_fw_mem_init(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + int ret; + + ret = ivpu_fw_update_global_range(vdev); + if (ret) + return ret; + + fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + if (!fw->mem) { + ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); + return -ENOMEM; + } + + if (fw->shave_nn_size) { + fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start, + fw->shave_nn_size, DRM_IVPU_BO_UNCACHED); + if (!fw->mem_shave_nn) { + ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); + ivpu_bo_free_internal(fw->mem); + return -ENOMEM; + } + } + + return 0; +} + +static void ivpu_fw_mem_fini(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + + if (fw->mem_shave_nn) { + ivpu_bo_free_internal(fw->mem_shave_nn); + fw->mem_shave_nn = NULL; + } + + ivpu_bo_free_internal(fw->mem); + fw->mem = NULL; +} + +int ivpu_fw_init(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_fw_request(vdev); + if (ret) + return ret; + + ret = ivpu_fw_parse(vdev); + if (ret) + goto err_fw_release; + + ret = ivpu_fw_mem_init(vdev); + if (ret) + goto err_fw_release; + + return 0; + +err_fw_release: + ivpu_fw_release(vdev); + return ret; +} + +void ivpu_fw_fini(struct ivpu_device *vdev) +{ + ivpu_fw_mem_fini(vdev); + ivpu_fw_release(vdev); +} + +int ivpu_fw_load(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + u64 image_end_offset = fw->image_load_offset + fw->image_size; + + memset(fw->mem->kvaddr, 0, fw->image_load_offset); + memcpy(fw->mem->kvaddr + fw->image_load_offset, + fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size); + + if (IVPU_WA(clear_runtime_mem)) { + u8 *start = fw->mem->kvaddr + image_end_offset; + u64 size = fw->mem->base.size - image_end_offset; + + memset(start, 0, size); + } + + wmb(); /* Flush WC buffers after writing fw->mem */ + + return 0; +} + +static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) +{ + ivpu_dbg(vdev, FW_BOOT, "boot_params.magic = 0x%x\n", + boot_params->magic); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_id = 0x%x\n", + boot_params->vpu_id); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_count = 0x%x\n", + boot_params->vpu_count); + ivpu_dbg(vdev, FW_BOOT, "boot_params.frequency = %u\n", + boot_params->frequency); + ivpu_dbg(vdev, FW_BOOT, "boot_params.perf_clk_frequency = %u\n", + boot_params->perf_clk_frequency); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_start = 0x%llx\n", + boot_params->ipc_header_area_start); + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_size = 0x%x\n", + boot_params->ipc_header_area_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_base = 0x%llx\n", + boot_params->shared_region_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_size = 0x%x\n", + boot_params->shared_region_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_start = 0x%llx\n", + boot_params->ipc_payload_area_start); + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_size = 0x%x\n", + boot_params->ipc_payload_area_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_base = 0x%llx\n", + boot_params->global_aliased_pio_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_size = 0x%x\n", + boot_params->global_aliased_pio_size); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.autoconfig = 0x%x\n", + boot_params->autoconfig); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 0x%x\n", + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use); + ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n", + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n", + boot_params->global_memory_allocator_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n", + boot_params->global_memory_allocator_size); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n", + boot_params->shave_nn_fw_base); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_mss = 0x%x\n", + boot_params->watchdog_irq_mss); + ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n", + boot_params->watchdog_irq_nce); + ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n", + boot_params->host_to_vpu_irq); + ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n", + boot_params->job_done_irq); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n", + boot_params->host_version_id); + ivpu_dbg(vdev, FW_BOOT, "boot_params.si_stepping = 0x%x\n", + boot_params->si_stepping); + ivpu_dbg(vdev, FW_BOOT, "boot_params.device_id = 0x%llx\n", + boot_params->device_id); + ivpu_dbg(vdev, FW_BOOT, "boot_params.feature_exclusion = 0x%llx\n", + boot_params->feature_exclusion); + ivpu_dbg(vdev, FW_BOOT, "boot_params.sku = 0x%llx\n", + boot_params->sku); + ivpu_dbg(vdev, FW_BOOT, "boot_params.min_freq_pll_ratio = 0x%x\n", + boot_params->min_freq_pll_ratio); + ivpu_dbg(vdev, FW_BOOT, "boot_params.pn_freq_pll_ratio = 0x%x\n", + boot_params->pn_freq_pll_ratio); + ivpu_dbg(vdev, FW_BOOT, "boot_params.max_freq_pll_ratio = 0x%x\n", + boot_params->max_freq_pll_ratio); + ivpu_dbg(vdev, FW_BOOT, "boot_params.default_trace_level = 0x%x\n", + boot_params->default_trace_level); + ivpu_dbg(vdev, FW_BOOT, "boot_params.tracing_buff_message_format_mask = 0x%llx\n", + boot_params->tracing_buff_message_format_mask); + ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_destination_mask = 0x%x\n", + boot_params->trace_destination_mask); + ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_hw_component_mask = 0x%llx\n", + boot_params->trace_hw_component_mask); + ivpu_dbg(vdev, FW_BOOT, "boot_params.boot_type = 0x%x\n", + boot_params->boot_type); + ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_base = 0x%llx\n", + boot_params->punit_telemetry_sram_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_size = 0x%llx\n", + boot_params->punit_telemetry_sram_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n", + boot_params->vpu_telemetry_enable); +} + +void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) +{ + struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx; + + /* In case of warm boot we only have to reset the entrypoint addr */ + if (!ivpu_fw_is_cold_boot(vdev)) { + boot_params->save_restore_ret_address = 0; + return; + } + + boot_params->magic = VPU_BOOT_PARAMS_MAGIC; + boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; + boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); + + /* + * Uncached region of VPU address space, covers IPC buffers, job queues + * and log buffers, programmable to L2$ Uncached by VPU MTRR + */ + boot_params->shared_region_base = vdev->hw->ranges.global_low.start; + boot_params->shared_region_size = vdev->hw->ranges.global_low.end - + vdev->hw->ranges.global_low.start; + + boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr; + boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2; + + boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2; + boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2; + + boot_params->global_aliased_pio_base = + vdev->hw->ranges.global_aliased_pio.start; + boot_params->global_aliased_pio_size = + ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio); + + /* Allow configuration for L2C_PAGE_TABLE with boot param value */ + boot_params->autoconfig = 1; + + /* Enable L2 cache for first 2GB of high memory */ + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1; + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = + ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start); + + if (vdev->fw->mem_shave_nn) + boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr; + + boot_params->watchdog_irq_mss = WATCHDOG_MSS_REDIRECT; + boot_params->watchdog_irq_nce = WATCHDOG_NCE_REDIRECT; + boot_params->si_stepping = ivpu_revision(vdev); + boot_params->device_id = ivpu_device_id(vdev); + boot_params->feature_exclusion = vdev->hw->tile_fuse; + boot_params->sku = vdev->hw->sku; + + boot_params->min_freq_pll_ratio = vdev->hw->pll.min_ratio; + boot_params->pn_freq_pll_ratio = vdev->hw->pll.pn_ratio; + boot_params->max_freq_pll_ratio = vdev->hw->pll.max_ratio; + + boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); + boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); + boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); + + wmb(); /* Flush WC buffers after writing bootparams */ + + ivpu_fw_boot_params_print(vdev, boot_params); +} diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h new file mode 100644 index 000000000000..8d275c802d1c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_FW_H__ +#define __IVPU_FW_H__ + +struct ivpu_device; +struct ivpu_bo; +struct vpu_boot_params; + +struct ivpu_fw_info { + const struct firmware *file; + struct ivpu_bo *mem; + struct ivpu_bo *mem_shave_nn; + struct ivpu_bo *mem_log_crit; + struct ivpu_bo *mem_log_verb; + u64 runtime_addr; + u32 runtime_size; + u64 image_load_offset; + u32 image_size; + u32 shave_nn_size; + u64 entry_point; /* Cold or warm boot entry point for next boot */ + u64 cold_boot_entry_point; +}; + +int ivpu_fw_init(struct ivpu_device *vdev); +void ivpu_fw_fini(struct ivpu_device *vdev); +int ivpu_fw_load(struct ivpu_device *vdev); +void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp); + +static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev) +{ + return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point; +} + +#endif /* __IVPU_FW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 0e9ef4c40901..55caf5479f5f 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -4,6 +4,7 @@ */ #include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_hw_mtl_reg.h" #include "ivpu_hw_reg_io.h" #include "ivpu_hw.h" @@ -588,6 +589,15 @@ static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = vdev->fw->entry_point >> 9; + REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val); + + val = REG_SET_FLD(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, DONE, val); + REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); } static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h new file mode 100644 index 000000000000..6b71be92ba65 --- /dev/null +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -0,0 +1,349 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef VPU_BOOT_API_H +#define VPU_BOOT_API_H + +/* + * =========== FW API version information beginning ================ + * The bellow values will be used to construct the version info this way: + * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | + * VPU_BOOT_API_VER_MINOR; + * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes. + */ + +/* + * Major version changes that break backward compatibility. + * Major version must start from 1 and can only be incremented. + */ +#define VPU_BOOT_API_VER_MAJOR 3 + +/* + * Minor version changes when API backward compatibility is preserved. + * Resets to 0 if Major version is incremented. + */ +#define VPU_BOOT_API_VER_MINOR 12 + +/* + * API header changed (field names, documentation, formatting) but API itself has not been changed + */ +#define VPU_BOOT_API_VER_PATCH 2 + +/* + * Index in the API version table + * Must be unique for each API + */ +#define VPU_BOOT_API_VER_INDEX 0 +/* ------------ FW API version information end ---------------------*/ + +#pragma pack(push, 1) + +/* + * Firmware image header format + */ +#define VPU_FW_HEADER_SIZE 4096 +#define VPU_FW_HEADER_VERSION 0x1 +#define VPU_FW_VERSION_SIZE 32 +#define VPU_FW_API_VER_NUM 16 + +struct vpu_firmware_header { + u32 header_version; + u32 image_format; + u64 image_load_address; + u32 image_size; + u64 entry_point; + u8 vpu_version[VPU_FW_VERSION_SIZE]; + u32 compression_type; + u64 firmware_version_load_address; + u32 firmware_version_size; + u64 boot_params_load_address; + u32 api_version[VPU_FW_API_VER_NUM]; + /* Size of memory require for firmware execution */ + u32 runtime_size; + u32 shave_nn_fw_size; +}; + +/* + * Firmware boot parameters format + */ + +#define VPU_BOOT_PLL_COUNT 3 +#define VPU_BOOT_PLL_OUT_COUNT 4 + +/** Values for boot_type field */ +#define VPU_BOOT_TYPE_COLDBOOT 0 +#define VPU_BOOT_TYPE_WARMBOOT 1 + +/** Value for magic filed */ +#define VPU_BOOT_PARAMS_MAGIC 0x10000 + +/** VPU scheduling mode. By default, OS scheduling is used. */ +#define VPU_SCHEDULING_MODE_OS 0 +#define VPU_SCHEDULING_MODE_HW 1 + +enum VPU_BOOT_L2_CACHE_CFG_TYPE { + VPU_BOOT_L2_CACHE_CFG_UPA = 0, + VPU_BOOT_L2_CACHE_CFG_NN = 1, + VPU_BOOT_L2_CACHE_CFG_NUM = 2 +}; + +/** + * Logging destinations. + * + * Logging output can be directed to different logging destinations. This enum + * defines the list of logging destinations supported by the VPU firmware (NOTE: + * a specific VPU FW binary may support only a subset of such output + * destinations, depending on the target platform and compile options). + */ +enum vpu_trace_destination { + VPU_TRACE_DESTINATION_PIPEPRINT = 0x1, + VPU_TRACE_DESTINATION_VERBOSE_TRACING = 0x2, + VPU_TRACE_DESTINATION_NORTH_PEAK = 0x4, +}; + +/* + * Processor bit shifts (for loggable HW components). + */ +#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_LRT 1 +#define VPU_TRACE_PROC_BIT_LNN 2 +#define VPU_TRACE_PROC_BIT_SHV_0 3 +#define VPU_TRACE_PROC_BIT_SHV_1 4 +#define VPU_TRACE_PROC_BIT_SHV_2 5 +#define VPU_TRACE_PROC_BIT_SHV_3 6 +#define VPU_TRACE_PROC_BIT_SHV_4 7 +#define VPU_TRACE_PROC_BIT_SHV_5 8 +#define VPU_TRACE_PROC_BIT_SHV_6 9 +#define VPU_TRACE_PROC_BIT_SHV_7 10 +#define VPU_TRACE_PROC_BIT_SHV_8 11 +#define VPU_TRACE_PROC_BIT_SHV_9 12 +#define VPU_TRACE_PROC_BIT_SHV_10 13 +#define VPU_TRACE_PROC_BIT_SHV_11 14 +#define VPU_TRACE_PROC_BIT_SHV_12 15 +#define VPU_TRACE_PROC_BIT_SHV_13 16 +#define VPU_TRACE_PROC_BIT_SHV_14 17 +#define VPU_TRACE_PROC_BIT_SHV_15 18 +#define VPU_TRACE_PROC_BIT_ACT_SHV_0 19 +#define VPU_TRACE_PROC_BIT_ACT_SHV_1 20 +#define VPU_TRACE_PROC_BIT_ACT_SHV_2 21 +#define VPU_TRACE_PROC_BIT_ACT_SHV_3 22 +#define VPU_TRACE_PROC_NO_OF_HW_DEVS 23 + +/* KMB HW component IDs are sequential, so define first and last IDs. */ +#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT +#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_SHV_15 + +struct vpu_boot_l2_cache_config { + u8 use; + u8 cfg; +}; + +struct vpu_warm_boot_section { + u32 src; + u32 dst; + u32 size; + u32 core_id; + u32 is_clear_op; +}; + +struct vpu_boot_params { + u32 magic; + u32 vpu_id; + u32 vpu_count; + u32 pad0[5]; + /* Clock frequencies: 0x20 - 0xFF */ + u32 frequency; + u32 pll[VPU_BOOT_PLL_COUNT][VPU_BOOT_PLL_OUT_COUNT]; + u32 perf_clk_frequency; + u32 pad1[42]; + /* Memory regions: 0x100 - 0x1FF */ + u64 ipc_header_area_start; + u32 ipc_header_area_size; + u64 shared_region_base; + u32 shared_region_size; + u64 ipc_payload_area_start; + u32 ipc_payload_area_size; + u64 global_aliased_pio_base; + u32 global_aliased_pio_size; + u32 autoconfig; + struct vpu_boot_l2_cache_config cache_defaults[VPU_BOOT_L2_CACHE_CFG_NUM]; + u64 global_memory_allocator_base; + u32 global_memory_allocator_size; + /** + * ShaveNN FW section VPU base address + * On VPU2.7 HW this address must be within 2GB range starting from L2C_PAGE_TABLE base + */ + u64 shave_nn_fw_base; + u64 save_restore_ret_address; /* stores the address of FW's restore entry point */ + u32 pad2[43]; + /* IRQ re-direct numbers: 0x200 - 0x2FF */ + s32 watchdog_irq_mss; + s32 watchdog_irq_nce; + /* ARM -> VPU doorbell interrupt. ARM is notifying VPU of async command or compute job. */ + u32 host_to_vpu_irq; + /* VPU -> ARM job done interrupt. VPU is notifying ARM of compute job completion. */ + u32 job_done_irq; + /* VPU -> ARM IRQ line to use to request MMU update. */ + u32 mmu_update_request_irq; + /* ARM -> VPU IRQ line to use to notify of MMU update completion. */ + u32 mmu_update_done_irq; + /* ARM -> VPU IRQ line to use to request power level change. */ + u32 set_power_level_irq; + /* VPU -> ARM IRQ line to use to notify of power level change completion. */ + u32 set_power_level_done_irq; + /* VPU -> ARM IRQ line to use to notify of VPU idle state change */ + u32 set_vpu_idle_update_irq; + /* VPU -> ARM IRQ line to use to request counter reset. */ + u32 metric_query_event_irq; + /* ARM -> VPU IRQ line to use to notify of counter reset completion. */ + u32 metric_query_event_done_irq; + /* VPU -> ARM IRQ line to use to notify of preemption completion. */ + u32 preemption_done_irq; + /* Padding. */ + u32 pad3[52]; + /* Silicon information: 0x300 - 0x3FF */ + u32 host_version_id; + u32 si_stepping; + u64 device_id; + u64 feature_exclusion; + u64 sku; + /** PLL ratio for minimum clock frequency */ + u32 min_freq_pll_ratio; + /** PLL ratio for maximum clock frequency */ + u32 max_freq_pll_ratio; + /** + * Initial log level threshold (messages with log level severity less than + * the threshold will not be logged); applies to every enabled logging + * destination and loggable HW component. See 'mvLog_t' enum for acceptable + * values. + */ + u32 default_trace_level; + u32 boot_type; + u64 punit_telemetry_sram_base; + u64 punit_telemetry_sram_size; + u32 vpu_telemetry_enable; + u64 crit_tracing_buff_addr; + u32 crit_tracing_buff_size; + u64 verbose_tracing_buff_addr; + u32 verbose_tracing_buff_size; + u64 verbose_tracing_sw_component_mask; /* TO BE REMOVED */ + /** + * Mask of destinations to which logging messages are delivered; bitwise OR + * of values defined in vpu_trace_destination enum. + */ + u32 trace_destination_mask; + /** + * Mask of hardware components for which logging is enabled; bitwise OR of + * bits defined by the VPU_TRACE_PROC_BIT_* macros. + */ + u64 trace_hw_component_mask; + /** Mask of trace message formats supported by the driver */ + u64 tracing_buff_message_format_mask; + u64 trace_reserved_1[2]; + /** + * Period at which the VPU reads the temp sensor values into MMIO, on + * platforms where that is necessary (in ms). 0 to disable reads. + */ + u32 temp_sensor_period_ms; + /** PLL ratio for efficient clock frequency */ + u32 pn_freq_pll_ratio; + u32 pad4[28]; + /* Warm boot information: 0x400 - 0x43F */ + u32 warm_boot_sections_count; + u32 warm_boot_start_address_reference; + u32 warm_boot_section_info_address_offset; + u32 pad5[13]; + /* Power States transitions timestamps: 0x440 - 0x46F*/ + struct { + /* VPU_IDLE -> VPU_ACTIVE transition initiated timestamp */ + u64 vpu_active_state_requested; + /* VPU_IDLE -> VPU_ACTIVE transition completed timestamp */ + u64 vpu_active_state_achieved; + /* VPU_ACTIVE -> VPU_IDLE transition initiated timestamp */ + u64 vpu_idle_state_requested; + /* VPU_ACTIVE -> VPU_IDLE transition completed timestamp */ + u64 vpu_idle_state_achieved; + /* VPU_IDLE -> VPU_STANDBY transition initiated timestamp */ + u64 vpu_standby_state_requested; + /* VPU_IDLE -> VPU_STANDBY transition completed timestamp */ + u64 vpu_standby_state_achieved; + } power_states_timestamps; + /* VPU scheduling mode. Values defined by VPU_SCHEDULING_MODE_* macros. */ + u32 vpu_scheduling_mode; + /* Present call period in milliseconds. */ + u32 vpu_focus_present_timer_ms; + /* Unused/reserved: 0x478 - 0xFFF */ + u32 pad6[738]; +}; + +/* + * Magic numbers set between host and vpu to detect corruptio of tracing init + */ + +#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) + +/* Tracing buffer message format definitions */ +#define VPU_TRACING_FORMAT_STRING 0 +#define VPU_TRACING_FORMAT_MIPI 2 +/* + * Header of the tracing buffer. + * The below defined header will be stored at the beginning of + * each allocated tracing buffer, followed by a series of 256b + * of ASCII trace message entries. + */ +struct vpu_tracing_buffer_header { + /** + * Magic number set by host to detect corruption + * @see VPU_TRACING_BUFFER_CANARY + */ + u32 host_canary_start; + /* offset from start of buffer for trace entries */ + u32 read_index; + u32 pad_to_cache_line_size_0[14]; + /* End of first cache line */ + + /** + * Magic number set by host to detect corruption + * @see VPU_TRACING_BUFFER_CANARY + */ + u32 vpu_canary_start; + /* offset from start of buffer from write start */ + u32 write_index; + /* counter for buffer wrapping */ + u32 wrap_count; + /* legacy field - do not use */ + u32 reserved_0; + /** + * Size of the log buffer include this header (@header_size) and space + * reserved for all messages. If @alignment` is greater that 0 the @Size + * must be multiple of @Alignment. + */ + u32 size; + /* Header version */ + u16 header_version; + /* Header size */ + u16 header_size; + /* + * Format of the messages in the trace buffer + * 0 - null terminated string + * 1 - size + null terminated string + * 2 - MIPI-SysT encoding + */ + u32 format; + /* + * Message alignment + * 0 - messages are place 1 after another + * n - every message starts and multiple on offset + */ + u32 alignment; /* 64, 128, 256 */ + /* Name of the logging entity, i.e "LRT", "LNN", "SHV0", etc */ + char name[16]; + u32 pad_to_cache_line_size_1[4]; + /* End of second cache line */ +}; + +#pragma pack(pop) + +#endif diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 093b83c5697e..f05f5e38ea6d 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -47,6 +47,11 @@ extern "C" { #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 #define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 #define DRM_IVPU_PARAM_CONTEXT_ID 7 +#define DRM_IVPU_PARAM_FW_API_VERSION 8 +#define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9 +#define DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID 10 +#define DRM_IVPU_PARAM_TILE_CONFIG 11 +#define DRM_IVPU_PARAM_SKU 12 #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 @@ -90,6 +95,22 @@ struct drm_ivpu_param { * %DRM_IVPU_PARAM_CONTEXT_ID: * Current context ID, always greater than 0 (read-only) * + * %DRM_IVPU_PARAM_FW_API_VERSION: + * Firmware API version array (read-only) + * + * %DRM_IVPU_PARAM_ENGINE_HEARTBEAT: + * Heartbeat value from an engine (read-only). + * Engine ID (i.e. DRM_IVPU_ENGINE_COMPUTE) is given via index. + * + * %DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID: + * Device-unique inference ID (read-only) + * + * %DRM_IVPU_PARAM_TILE_CONFIG: + * VPU tile configuration (read-only) + * + * %DRM_IVPU_PARAM_SKU: + * VPU SKU ID (read-only) + * */ __u32 param; From cd7272215c44676dba236491941c6c406701cc5e Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:22 +0100 Subject: [PATCH 19/45] accel/ivpu: Add command buffer submission logic Each of the user contexts has two command queues, one for compute engine and one for the copy engine. Command queues are allocated and registered in the device when the first job (command buffer) is submitted from the user space to the VPU device. The userspace provides a list of GEM buffer object handles to submit to the VPU, the driver resolves buffer handles, pins physical memory if needed, increments ref count for each buffer and stores pointers to buffer objects in the ivpu_job objects that track jobs submitted to the device. The VPU signals job completion with an asynchronous message that contains the job id passed to firmware when the job was submitted. Currently, the driver supports simple scheduling logic where jobs submitted from user space are immediately pushed to the VPU device command queues. In the future, it will be extended to use hardware base scheduling and/or drm_sched. Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-7-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 1 + drivers/accel/ivpu/ivpu_drv.c | 34 +- drivers/accel/ivpu/ivpu_drv.h | 5 + drivers/accel/ivpu/ivpu_gem.c | 26 ++ drivers/accel/ivpu/ivpu_gem.h | 1 + drivers/accel/ivpu/ivpu_job.c | 602 ++++++++++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_job.h | 67 ++++ include/uapi/drm/ivpu_accel.h | 92 ++++++ 8 files changed, 824 insertions(+), 4 deletions(-) create mode 100644 drivers/accel/ivpu/ivpu_job.c create mode 100644 drivers/accel/ivpu/ivpu_job.h diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 9fa6a76e9d79..e61ece53a9ae 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -7,6 +7,7 @@ intel_vpu-y := \ ivpu_gem.o \ ivpu_hw_mtl.o \ ivpu_ipc.o \ + ivpu_job.o \ ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 0b43e880c99c..c893c8ac6bed 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -20,6 +20,7 @@ #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" +#include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -31,6 +32,8 @@ static const struct drm_driver driver; +static struct lock_class_key submitted_jobs_xa_lock_class_key; + int ivpu_dbg_mask; module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); @@ -84,8 +87,11 @@ static void file_priv_release(struct kref *ref) ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + ivpu_cmdq_release_all(file_priv); + ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); + mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -209,10 +215,11 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) file_priv->vdev = vdev; file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; kref_init(&file_priv->ref); + mutex_init(&file_priv->lock); ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); if (ret) - goto err_free_file_priv; + goto err_mutex_destroy; old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); if (xa_is_err(old)) { @@ -229,7 +236,8 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) err_ctx_fini: ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); -err_free_file_priv: +err_mutex_destroy: + mutex_destroy(&file_priv->lock); kfree(file_priv); err_xa_erase: xa_erase_irq(&vdev->context_xa, ctx_id); @@ -252,6 +260,8 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -458,6 +468,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; atomic64_set(&vdev->unique_id_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); + lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); ret = ivpu_pci_init(vdev); if (ret) { @@ -509,20 +521,30 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_fw_fini; } + ret = ivpu_job_done_thread_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); + goto err_ipc_fini; + } + ret = ivpu_fw_load(vdev); if (ret) { ivpu_err(vdev, "Failed to load firmware: %d\n", ret); - goto err_fw_fini; + goto err_job_done_thread_fini; } ret = ivpu_boot(vdev); if (ret) { ivpu_err(vdev, "Failed to boot: %d\n", ret); - goto err_fw_fini; + goto err_job_done_thread_fini; } return 0; +err_job_done_thread_fini: + ivpu_job_done_thread_fini(vdev); +err_ipc_fini: + ivpu_ipc_fini(vdev); err_fw_fini: ivpu_fw_fini(vdev); err_mmu_gctx_fini: @@ -530,6 +552,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) err_power_down: ivpu_hw_power_down(vdev); err_xa_destroy: + xa_destroy(&vdev->submitted_jobs_xa); xa_destroy(&vdev->context_xa); return ret; } @@ -537,10 +560,13 @@ static int ivpu_dev_init(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_shutdown(vdev); + ivpu_job_done_thread_fini(vdev); ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); ivpu_mmu_global_context_fini(vdev); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + xa_destroy(&vdev->submitted_jobs_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); xa_destroy(&vdev->context_xa); } diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 4f2c4268f1dd..5e63c70c47b3 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -94,6 +94,9 @@ struct ivpu_device { struct xarray context_xa; struct xa_limit context_xa_limit; + struct xarray submitted_jobs_xa; + struct task_struct *job_done_thread; + atomic64_t unique_id_counter; struct { @@ -111,6 +114,8 @@ struct ivpu_device { struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; + struct mutex lock; /* Protects cmdq */ + struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; struct ivpu_mmu_context ctx; u32 priority; bool has_mmu_faults; diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index b938359b19af..d1f923971b4c 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -678,6 +678,32 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file return ret; } +int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_bo_wait *args = data; + struct drm_gem_object *obj; + unsigned long timeout; + long ret; + + timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -EINVAL; + + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout); + if (ret == 0) { + ret = -ETIMEDOUT; + } else if (ret > 0) { + ret = 0; + args->job_status = to_ivpu_bo(obj)->job_status; + } + + drm_gem_object_put(obj); + + return ret; +} + static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { unsigned long dma_refcount = 0; diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 1891c90702c2..6b0ceda5f253 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -30,6 +30,7 @@ struct ivpu_bo { u32 handle; u32 flags; uintptr_t user_ptr; + u32 job_status; }; enum ivpu_bo_type { diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c new file mode 100644 index 000000000000..23cdb3b3f1f3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_job.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" +#include "ivpu_job.h" +#include "ivpu_jsm_msg.h" + +#define CMD_BUF_IDX 0 +#define JOB_ID_JOB_MASK GENMASK(7, 0) +#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) +#define JOB_MAX_BUFFER_COUNT 65535 + +static unsigned int ivpu_tdr_timeout_ms; +module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); +MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); + +static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) +{ + ivpu_hw_reg_db_set(vdev, cmdq->db_id); +} + +static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct vpu_job_queue_header *jobq_header; + struct ivpu_cmdq *cmdq; + + cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); + if (!cmdq) + return NULL; + + cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); + if (!cmdq->mem) + goto cmdq_free; + + cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); + cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / + sizeof(struct vpu_job_queue_entry)); + + cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; + jobq_header = &cmdq->jobq->header; + jobq_header->engine_idx = engine; + jobq_header->head = 0; + jobq_header->tail = 0; + wmb(); /* Flush WC buffer for jobq->header */ + + return cmdq; + +cmdq_free: + kfree(cmdq); + return NULL; +} + +static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (!cmdq) + return; + + ivpu_bo_free_internal(cmdq->mem); + kfree(cmdq); +} + +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + int ret; + + lockdep_assert_held(&file_priv->lock); + + if (!cmdq) { + cmdq = ivpu_cmdq_alloc(file_priv, engine); + if (!cmdq) + return NULL; + file_priv->cmdq[engine] = cmdq; + } + + if (cmdq->db_registered) + return cmdq; + + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, + cmdq->mem->vpu_addr, cmdq->mem->base.size); + if (ret) + return NULL; + + cmdq->db_registered = true; + + return cmdq; +} + +static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + + lockdep_assert_held(&file_priv->lock); + + if (cmdq) { + file_priv->cmdq[engine] = NULL; + if (cmdq->db_registered) + ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); + + ivpu_cmdq_free(file_priv, cmdq); + } +} + +void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) +{ + int i; + + mutex_lock(&file_priv->lock); + + for (i = 0; i < IVPU_NUM_ENGINES; i++) + ivpu_cmdq_release_locked(file_priv, i); + + mutex_unlock(&file_priv->lock); +} + +/* + * Mark the doorbell as unregistered and reset job queue pointers. + * This function needs to be called when the VPU hardware is restarted + * and FW looses job queue state. The next time job queue is used it + * will be registered again. + */ +static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + + lockdep_assert_held(&file_priv->lock); + + if (cmdq) { + cmdq->db_registered = false; + cmdq->jobq->header.head = 0; + cmdq->jobq->header.tail = 0; + wmb(); /* Flush WC buffer for jobq header */ + } +} + +static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) +{ + int i; + + mutex_lock(&file_priv->lock); + + for (i = 0; i < IVPU_NUM_ENGINES; i++) + ivpu_cmdq_reset_locked(file_priv, i); + + mutex_unlock(&file_priv->lock); +} + +void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); + if (!file_priv) + continue; + + ivpu_cmdq_reset_all(file_priv); + + ivpu_file_priv_put(&file_priv); + } +} + +static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) +{ + struct ivpu_device *vdev = job->vdev; + struct vpu_job_queue_header *header = &cmdq->jobq->header; + struct vpu_job_queue_entry *entry; + u32 tail = READ_ONCE(header->tail); + u32 next_entry = (tail + 1) % cmdq->entry_count; + + /* Check if there is space left in job queue */ + if (next_entry == header->head) { + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", + job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + return -EBUSY; + } + + entry = &cmdq->jobq->job[tail]; + entry->batch_buf_addr = job->cmd_buf_vpu_addr; + entry->job_id = job->job_id; + entry->flags = 0; + wmb(); /* Ensure that tail is updated after filling entry */ + header->tail = next_entry; + wmb(); /* Flush WC buffer for jobq header */ + + return 0; +} + +struct ivpu_fence { + struct dma_fence base; + spinlock_t lock; /* protects base */ + struct ivpu_device *vdev; +}; + +static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) +{ + return container_of(fence, struct ivpu_fence, base); +} + +static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) +{ + struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); + + return dev_name(ivpu_fence->vdev->drm.dev); +} + +static const struct dma_fence_ops ivpu_fence_ops = { + .get_driver_name = ivpu_fence_get_driver_name, + .get_timeline_name = ivpu_fence_get_timeline_name, +}; + +static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) +{ + struct ivpu_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + fence->vdev = vdev; + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); + + return &fence->base; +} + +static void job_get(struct ivpu_job *job, struct ivpu_job **link) +{ + struct ivpu_device *vdev = job->vdev; + + kref_get(&job->ref); + *link = job; + + ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); +} + +static void job_release(struct kref *ref) +{ + struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); + struct ivpu_device *vdev = job->vdev; + u32 i; + + for (i = 0; i < job->bo_count; i++) + if (job->bos[i]) + drm_gem_object_put(&job->bos[i]->base); + + dma_fence_put(job->done_fence); + ivpu_file_priv_put(&job->file_priv); + + ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); + kfree(job); +} + +static void job_put(struct ivpu_job *job) +{ + struct ivpu_device *vdev = job->vdev; + + ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); + kref_put(&job->ref, job_release); +} + +static struct ivpu_job * +ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_job *job; + size_t buf_size; + + buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); + job = kzalloc(buf_size, GFP_KERNEL); + if (!job) + return NULL; + + kref_init(&job->ref); + + job->vdev = vdev; + job->engine_idx = engine_idx; + job->bo_count = bo_count; + job->done_fence = ivpu_fence_create(vdev); + if (!job->done_fence) { + ivpu_warn_ratelimited(vdev, "Failed to create a fence\n"); + goto err_free_job; + } + + job->file_priv = ivpu_file_priv_get(file_priv); + + ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); + + return job; + +err_free_job: + kfree(job); + return NULL; +} + +static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + struct ivpu_job *job; + + job = xa_erase(&vdev->submitted_jobs_xa, job_id); + if (!job) + return -ENOENT; + + if (job->file_priv->has_mmu_faults) + job_status = VPU_JSM_STATUS_ABORTED; + + job->bos[CMD_BUF_IDX]->job_status = job_status; + dma_fence_signal(job->done_fence); + + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + + job_put(job); + return 0; +} + +static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) +{ + struct vpu_ipc_msg_payload_job_done *payload; + struct vpu_jsm_msg *job_ret_msg = msg; + int ret; + + payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; + + ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); + if (ret) + ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); +} + +void ivpu_jobs_abort_all(struct ivpu_device *vdev) +{ + struct ivpu_job *job; + unsigned long id; + + xa_for_each(&vdev->submitted_jobs_xa, id, job) + ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); +} + +static int ivpu_direct_job_submission(struct ivpu_job *job) +{ + struct ivpu_file_priv *file_priv = job->file_priv; + struct ivpu_device *vdev = job->vdev; + struct xa_limit job_id_range; + struct ivpu_cmdq *cmdq; + int ret; + + mutex_lock(&file_priv->lock); + + cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); + if (!cmdq) { + ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", + file_priv->ctx.id, job->engine_idx); + ret = -EINVAL; + goto err_unlock; + } + + job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; + + job_get(job, &job); + ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); + goto err_job_put; + } + + ret = ivpu_cmdq_push_job(cmdq, job); + if (ret) + goto err_xa_erase; + + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n", + job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail); + + if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { + ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + cmdq->jobq->header.head = cmdq->jobq->header.tail; + wmb(); /* Flush WC buffer for jobq header */ + } else { + ivpu_cmdq_ring_db(vdev, cmdq); + } + + mutex_unlock(&file_priv->lock); + return 0; + +err_xa_erase: + xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_job_put: + job_put(job); +err_unlock: + mutex_unlock(&file_priv->lock); + return ret; +} + +static int +ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, + u32 buf_count, u32 commands_offset) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ww_acquire_ctx acquire_ctx; + struct ivpu_bo *bo; + int ret; + u32 i; + + for (i = 0; i < buf_count; i++) { + struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); + + if (!obj) + return -ENOENT; + + job->bos[i] = to_ivpu_bo(obj); + + ret = ivpu_bo_pin(job->bos[i]); + if (ret) + return ret; + } + + bo = job->bos[CMD_BUF_IDX]; + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { + ivpu_warn(vdev, "Buffer is already in use\n"); + return -EBUSY; + } + + if (commands_offset >= bo->base.size) { + ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); + return -EINVAL; + } + + job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; + + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, + &acquire_ctx); + if (ret) { + ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); + return ret; + } + + for (i = 0; i < buf_count; i++) { + ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); + if (ret) { + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + goto unlock_reservations; + } + } + + for (i = 0; i < buf_count; i++) + dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + +unlock_reservations: + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); + + wmb(); /* Flush write combining buffers */ + + return ret; +} + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + int ret = 0; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_submit *params = data; + struct ivpu_job *job; + u32 *buf_handles; + + if (params->engine > DRM_IVPU_ENGINE_COPY) + return -EINVAL; + + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) + return -EINVAL; + + if (!IS_ALIGNED(params->commands_offset, 8)) + return -EINVAL; + + if (!file_priv->ctx.id) + return -EINVAL; + + if (file_priv->has_mmu_faults) + return -EBADFD; + + buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); + if (!buf_handles) + return -ENOMEM; + + ret = copy_from_user(buf_handles, + (void __user *)params->buffers_ptr, + params->buffer_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_handles; + } + + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", + file_priv->ctx.id, params->buffer_count); + + job = ivpu_create_job(file_priv, params->engine, params->buffer_count); + if (!job) { + ivpu_err(vdev, "Failed to create job\n"); + ret = -ENOMEM; + goto free_handles; + } + + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, + params->commands_offset); + if (ret) { + ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); + goto job_put; + } + + ret = ivpu_direct_job_submission(job); + if (ret) { + dma_fence_signal(job->done_fence); + ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); + } + +job_put: + job_put(job); +free_handles: + kfree(buf_handles); + + return ret; +} + +static int ivpu_job_done_thread(void *arg) +{ + struct ivpu_device *vdev = (struct ivpu_device *)arg; + struct ivpu_ipc_consumer cons; + struct vpu_jsm_msg jsm_msg; + bool jobs_submitted; + unsigned int timeout; + int ret; + + ivpu_dbg(vdev, JOB, "Started %s\n", __func__); + + ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); + + while (!kthread_should_stop()) { + timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); + ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); + if (!ret) { + ivpu_job_done_message(vdev, &jsm_msg); + } else if (ret == -ETIMEDOUT) { + if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { + ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); + ivpu_hw_diagnose_failure(vdev); + } + } + } + + ivpu_ipc_consumer_del(vdev, &cons); + + ivpu_jobs_abort_all(vdev); + + ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); + return 0; +} + +int ivpu_job_done_thread_init(struct ivpu_device *vdev) +{ + struct task_struct *thread; + + thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); + if (IS_ERR(thread)) { + ivpu_err(vdev, "Failed to start job completion thread\n"); + return -EIO; + } + + get_task_struct(thread); + wake_up_process(thread); + + vdev->job_done_thread = thread; + + return 0; +} + +void ivpu_job_done_thread_fini(struct ivpu_device *vdev) +{ + kthread_stop(vdev->job_done_thread); + put_task_struct(vdev->job_done_thread); +} diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h new file mode 100644 index 000000000000..aa1f0b9479b0 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_job.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_JOB_H__ +#define __IVPU_JOB_H__ + +#include +#include + +#include "ivpu_gem.h" + +struct ivpu_device; +struct ivpu_file_priv; + +/** + * struct ivpu_cmdq - Object representing device queue used to send jobs. + * @jobq: Pointer to job queue memory shared with the device + * @mem: Memory allocated for the job queue, shared with device + * @entry_count Number of job entries in the queue + * @db_id: Doorbell assigned to this job queue + * @db_registered: True if doorbell is registered in device + */ +struct ivpu_cmdq { + struct vpu_job_queue *jobq; + struct ivpu_bo *mem; + u32 entry_count; + u32 db_id; + bool db_registered; +}; + +/** + * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer. + * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW. + * This is a unit of execution, and be tracked by the job_id for + * any status reporting from VPU FW through IPC JOB RET/DONE message. + * @file_priv: The client that submitted this job + * @job_id: Job ID for KMD tracking and job status reporting from VPU FW + * @status: Status of the Job from IPC JOB RET/DONE message + * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job + * @submit_status_offset: Offset within batch buffer where job completion handler + will update the job status + */ +struct ivpu_job { + struct kref ref; + struct ivpu_device *vdev; + struct ivpu_file_priv *file_priv; + struct dma_fence *done_fence; + u64 cmd_buf_vpu_addr; + u32 job_id; + u32 engine_idx; + size_t bo_count; + struct ivpu_bo *bos[]; +}; + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); +void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); + +int ivpu_job_done_thread_init(struct ivpu_device *vdev); +void ivpu_job_done_thread_fini(struct ivpu_device *vdev); + +void ivpu_jobs_abort_all(struct ivpu_device *vdev); + +#endif /* __IVPU_JOB_H__ */ diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index f05f5e38ea6d..839820aed87e 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -19,6 +19,8 @@ extern "C" { #define DRM_IVPU_SET_PARAM 0x01 #define DRM_IVPU_BO_CREATE 0x02 #define DRM_IVPU_BO_INFO 0x03 +#define DRM_IVPU_SUBMIT 0x05 +#define DRM_IVPU_BO_WAIT 0x06 #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -32,6 +34,12 @@ extern "C" { #define DRM_IOCTL_IVPU_BO_INFO \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) +#define DRM_IOCTL_IVPU_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit) + +#define DRM_IOCTL_IVPU_BO_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) + /** * DOC: contexts * @@ -207,6 +215,90 @@ struct drm_ivpu_bo_info { __u64 size; }; +/* drm_ivpu_submit engines */ +#define DRM_IVPU_ENGINE_COMPUTE 0 +#define DRM_IVPU_ENGINE_COPY 1 + +/** + * struct drm_ivpu_submit - Submit commands to the VPU + * + * Execute a single command buffer on a given VPU engine. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this job. + * The number of elements in the array must be equal to the value given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain all + * BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** + * @engine: Select the engine this job should be executed on + * + * %DRM_IVPU_ENGINE_COMPUTE: + * + * Performs Deep Learning Neural Compute Inference Operations + * + * %DRM_IVPU_ENGINE_COPY: + * + * Performs memory copy operations to/from system memory allocated for VPU + */ + __u32 engine; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; +}; + +/* drm_ivpu_bo_wait job status codes */ +#define DRM_IVPU_JOB_STATUS_SUCCESS 0 + +/** + * struct drm_ivpu_bo_wait - Wait for BO to become inactive + * + * Blocks until a given buffer object becomes inactive. + * With @timeout_ms set to 0 returns immediately. + */ +struct drm_ivpu_bo_wait { + /** @handle: Handle to the buffer object to be waited on */ + __u32 handle; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */ + __s64 timeout_ns; + + /** + * @job_status: + * + * Job status code which is updated after the job is completed. + * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise. + * Valid only if @handle points to a command buffer. + */ + __u32 job_status; + + /** @pad: Padding - must be zero */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif From 852be13f3bd32c1eab808840cfac41b1fea25991 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:23 +0100 Subject: [PATCH 20/45] accel/ivpu: Add PM support - Implement cold and warm firmware boot flows - Add hang recovery support - Add runtime power management support Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-8-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 3 +- drivers/accel/ivpu/ivpu_drv.c | 28 +++ drivers/accel/ivpu/ivpu_drv.h | 2 + drivers/accel/ivpu/ivpu_fw.c | 4 + drivers/accel/ivpu/ivpu_hw_mtl.c | 12 ++ drivers/accel/ivpu/ivpu_ipc.c | 20 +- drivers/accel/ivpu/ivpu_job.c | 14 +- drivers/accel/ivpu/ivpu_mmu.c | 9 +- drivers/accel/ivpu/ivpu_pm.c | 329 +++++++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_pm.h | 38 ++++ 10 files changed, 451 insertions(+), 8 deletions(-) create mode 100644 drivers/accel/ivpu/ivpu_pm.c create mode 100644 drivers/accel/ivpu/ivpu_pm.h diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index e61ece53a9ae..80f1fb3548ae 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -10,6 +10,7 @@ intel_vpu-y := \ ivpu_job.o \ ivpu_jsm_msg.o \ ivpu_mmu.o \ - ivpu_mmu_context.o + ivpu_mmu_context.o \ + ivpu_pm.o obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o \ No newline at end of file diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index c893c8ac6bed..2bc2f1b90671 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -24,6 +24,7 @@ #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#include "ivpu_pm.h" #ifndef DRIVER_VERSION_STR #define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ @@ -462,6 +463,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (!vdev->ipc) return -ENOMEM; + vdev->pm = drmm_kzalloc(&vdev->drm, sizeof(*vdev->pm), GFP_KERNEL); + if (!vdev->pm) + return -ENOMEM; + vdev->hw->ops = &ivpu_hw_mtl_ops; vdev->platform = IVPU_PLATFORM_INVALID; vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; @@ -521,6 +526,12 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_fw_fini; } + ret = ivpu_pm_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize PM: %d\n", ret); + goto err_ipc_fini; + } + ret = ivpu_job_done_thread_init(vdev); if (ret) { ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); @@ -539,6 +550,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_job_done_thread_fini; } + ivpu_pm_enable(vdev); + return 0; err_job_done_thread_fini: @@ -559,6 +572,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { + ivpu_pm_disable(vdev); ivpu_shutdown(vdev); ivpu_job_done_thread_fini(vdev); ivpu_ipc_fini(vdev); @@ -611,11 +625,25 @@ static void ivpu_remove(struct pci_dev *pdev) ivpu_dev_fini(vdev); } +static const struct dev_pm_ops ivpu_drv_pci_pm = { + SET_SYSTEM_SLEEP_PM_OPS(ivpu_pm_suspend_cb, ivpu_pm_resume_cb) + SET_RUNTIME_PM_OPS(ivpu_pm_runtime_suspend_cb, ivpu_pm_runtime_resume_cb, NULL) +}; + +static const struct pci_error_handlers ivpu_drv_pci_err = { + .reset_prepare = ivpu_pm_reset_prepare_cb, + .reset_done = ivpu_pm_reset_done_cb, +}; + static struct pci_driver ivpu_pci_driver = { .name = KBUILD_MODNAME, .id_table = ivpu_pci_ids, .probe = ivpu_probe, .remove = ivpu_remove, + .driver = { + .pm = &ivpu_drv_pci_pm, + }, + .err_handler = &ivpu_drv_pci_err, }; module_pci_driver(ivpu_pci_driver); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 5e63c70c47b3..f47b4965db2e 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -76,6 +76,7 @@ struct ivpu_hw_info; struct ivpu_mmu_info; struct ivpu_fw_info; struct ivpu_ipc_info; +struct ivpu_pm_info; struct ivpu_device { struct drm_device drm; @@ -89,6 +90,7 @@ struct ivpu_device { struct ivpu_mmu_info *mmu; struct ivpu_fw_info *fw; struct ivpu_ipc_info *ipc; + struct ivpu_pm_info *pm; struct ivpu_mmu_context gctx; struct xarray context_xa; diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 4baa0767a10d..b463c24adb70 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -14,6 +14,7 @@ #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" +#include "ivpu_pm.h" #define FW_GLOBAL_MEM_START (2ull * SZ_1G) #define FW_GLOBAL_MEM_END (3ull * SZ_1G) @@ -361,9 +362,12 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params /* In case of warm boot we only have to reset the entrypoint addr */ if (!ivpu_fw_is_cold_boot(vdev)) { boot_params->save_restore_ret_address = 0; + vdev->pm->is_warmboot = true; return; } + vdev->pm->is_warmboot = false; + boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 55caf5479f5f..b59b1f472b40 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -10,6 +10,7 @@ #include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_mmu.h" +#include "ivpu_pm.h" #define TILE_FUSE_ENABLE_BOTH 0x0 #define TILE_FUSE_ENABLE_UPPER 0x1 @@ -921,6 +922,8 @@ static void ivpu_hw_mtl_irq_disable(struct ivpu_device *vdev) static void ivpu_hw_mtl_irq_wdt_nce_handler(struct ivpu_device *vdev) { ivpu_err_ratelimited(vdev, "WDT NCE irq\n"); + + ivpu_pm_schedule_recovery(vdev); } static void ivpu_hw_mtl_irq_wdt_mss_handler(struct ivpu_device *vdev) @@ -928,11 +931,14 @@ static void ivpu_hw_mtl_irq_wdt_mss_handler(struct ivpu_device *vdev) ivpu_err_ratelimited(vdev, "WDT MSS irq\n"); ivpu_hw_wdt_disable(vdev); + ivpu_pm_schedule_recovery(vdev); } static void ivpu_hw_mtl_irq_noc_firewall_handler(struct ivpu_device *vdev) { ivpu_err_ratelimited(vdev, "NOC Firewall irq\n"); + + ivpu_pm_schedule_recovery(vdev); } /* Handler for IRQs from VPU core (irqV) */ @@ -970,6 +976,7 @@ static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) { u32 status = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; + bool schedule_recovery = false; if (status == 0) return 0; @@ -983,6 +990,7 @@ static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); REGB_WR32(MTL_BUTTRESS_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; } if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) { @@ -993,6 +1001,7 @@ static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); REGB_WR32(MTL_BUTTRESS_UFI_ERR_CLEAR, 0x1); + schedule_recovery = true; } /* @@ -1005,6 +1014,9 @@ static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) /* Re-enable global interrupt */ REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); + if (schedule_recovery) + ivpu_pm_schedule_recovery(vdev); + return status; } diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index c756476bfc5b..3adcfa80fc0e 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -14,6 +14,7 @@ #include "ivpu_hw_reg_io.h" #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" #define IPC_MAX_RX_MSG 128 #define IS_KTHREAD() (get_current()->flags & PF_KTHREAD) @@ -294,18 +295,27 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, { struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; struct vpu_jsm_msg hb_resp; - int ret; + int ret, hb_ret; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp, channel, timeout_ms); if (ret != -ETIMEDOUT) - return ret; + goto rpm_put; - ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, - &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (ret == -ETIMEDOUT) + hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, + &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, + vdev->timeout.jsm); + if (hb_ret == -ETIMEDOUT) { ivpu_hw_diagnose_failure(vdev); + ivpu_pm_schedule_recovery(vdev); + } +rpm_put: + ivpu_rpm_put(vdev); return ret; } diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 23cdb3b3f1f3..3276bd9107b4 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -17,6 +17,7 @@ #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" #define CMD_BUF_IDX 0 #define JOB_ID_JOB_MASK GENMASK(7, 0) @@ -270,6 +271,9 @@ static void job_release(struct kref *ref) ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); kfree(job); + + /* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */ + ivpu_rpm_put(vdev); } static void job_put(struct ivpu_job *job) @@ -286,11 +290,16 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) struct ivpu_device *vdev = file_priv->vdev; struct ivpu_job *job; size_t buf_size; + int ret; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return NULL; buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); job = kzalloc(buf_size, GFP_KERNEL); if (!job) - return NULL; + goto err_rpm_put; kref_init(&job->ref); @@ -311,6 +320,8 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) err_free_job: kfree(job); +err_rpm_put: + ivpu_rpm_put(vdev); return NULL; } @@ -565,6 +576,7 @@ static int ivpu_job_done_thread(void *arg) if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); ivpu_hw_diagnose_failure(vdev); + ivpu_pm_schedule_recovery(vdev); } } } diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index ff5ef6da1fd3..694e978aba66 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -11,6 +11,7 @@ #include "ivpu_hw_reg_io.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#include "ivpu_pm.h" #define IVPU_MMU_IDR0_REF 0x080f3e0f #define IVPU_MMU_IDR0_REF_SIMICS 0x080f3e1f @@ -814,6 +815,7 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) { + bool schedule_recovery = false; u32 *event; u32 ssid; @@ -823,9 +825,14 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_mmu_dump_event(vdev, event); ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); - if (ssid != IVPU_GLOBAL_CONTEXT_MMU_SSID) + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + schedule_recovery = true; + else ivpu_mmu_user_context_mark_invalid(vdev, ssid); } + + if (schedule_recovery) + ivpu_pm_schedule_recovery(vdev); } void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c new file mode 100644 index 000000000000..553bcbd787b3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include "vpu_boot_api.h" +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_fw.h" +#include "ivpu_ipc.h" +#include "ivpu_job.h" +#include "ivpu_mmu.h" +#include "ivpu_pm.h" + +static bool ivpu_disable_recovery; +module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); +MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); + +#define PM_RESCHEDULE_LIMIT 5 + +static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + + ivpu_cmdq_reset_all_contexts(vdev); + ivpu_ipc_reset(vdev); + ivpu_fw_load(vdev); + fw->entry_point = fw->cold_boot_entry_point; +} + +static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + struct vpu_boot_params *bp = fw->mem->kvaddr; + + if (!bp->save_restore_ret_address) { + ivpu_pm_prepare_cold_boot(vdev); + return; + } + + ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address); + fw->entry_point = bp->save_restore_ret_address; +} + +static int ivpu_suspend(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_shutdown(vdev); + if (ret) { + ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); + return ret; + } + + return ret; +} + +static int ivpu_resume(struct ivpu_device *vdev) +{ + int ret; + +retry: + ret = ivpu_hw_power_up(vdev); + if (ret) { + ivpu_err(vdev, "Failed to power up HW: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); + ivpu_hw_power_down(vdev); + return ret; + } + + ret = ivpu_boot(vdev); + if (ret) { + ivpu_mmu_disable(vdev); + ivpu_hw_power_down(vdev); + if (!ivpu_fw_is_cold_boot(vdev)) { + ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret); + ivpu_pm_prepare_cold_boot(vdev); + goto retry; + } else { + ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); + } + } + + return ret; +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + int ret; + + ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); +} + +void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) +{ + struct ivpu_pm_info *pm = vdev->pm; + + if (ivpu_disable_recovery) { + ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n"); + return; + } + + if (ivpu_is_fpga(vdev)) { + ivpu_err(vdev, "Recovery not available on FPGA\n"); + return; + } + + /* Schedule recovery if it's not in progress */ + if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) { + ivpu_hw_irq_disable(vdev); + queue_work(system_long_wq, &pm->recovery_work); + } +} + +int ivpu_pm_suspend_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Suspend..\n"); + + ret = ivpu_suspend(vdev); + if (ret && vdev->pm->suspend_reschedule_counter) { + ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", + vdev->pm->suspend_reschedule_counter); + pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); + vdev->pm->suspend_reschedule_counter--; + return -EBUSY; + } else if (!vdev->pm->suspend_reschedule_counter) { + ivpu_warn(vdev, "Failed to enter idle, force suspend\n"); + ivpu_pm_prepare_cold_boot(vdev); + } else { + ivpu_pm_prepare_warm_boot(vdev); + } + + vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + pci_save_state(to_pci_dev(dev)); + pci_set_power_state(to_pci_dev(dev), PCI_D3hot); + + ivpu_dbg(vdev, PM, "Suspend done.\n"); + + return ret; +} + +int ivpu_pm_resume_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Resume..\n"); + + pci_set_power_state(to_pci_dev(dev), PCI_D0); + pci_restore_state(to_pci_dev(dev)); + + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to resume: %d\n", ret); + + ivpu_dbg(vdev, PM, "Resume done.\n"); + + return ret; +} + +int ivpu_pm_runtime_suspend_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Runtime suspend..\n"); + + if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { + ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", + vdev->pm->suspend_reschedule_counter); + pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); + vdev->pm->suspend_reschedule_counter--; + return -EAGAIN; + } + + ret = ivpu_suspend(vdev); + if (ret) + ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); + + if (!vdev->pm->suspend_reschedule_counter) { + ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); + ivpu_pm_prepare_cold_boot(vdev); + } else { + ivpu_pm_prepare_warm_boot(vdev); + } + + vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); + + return 0; +} + +int ivpu_pm_runtime_resume_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Runtime resume..\n"); + + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); + + ivpu_dbg(vdev, PM, "Runtime resume done.\n"); + + return ret; +} + +int ivpu_rpm_get(struct ivpu_device *vdev) +{ + int ret; + + ivpu_dbg(vdev, RPM, "rpm_get count %d\n", atomic_read(&vdev->drm.dev->power.usage_count)); + + ret = pm_runtime_resume_and_get(vdev->drm.dev); + if (!drm_WARN_ON(&vdev->drm, ret < 0)) + vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + return ret; +} + +void ivpu_rpm_put(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, RPM, "rpm_put count %d\n", atomic_read(&vdev->drm.dev->power.usage_count)); + + pm_runtime_mark_last_busy(vdev->drm.dev); + pm_runtime_put_autosuspend(vdev->drm.dev); +} + +void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + + pm_runtime_get_sync(vdev->drm.dev); + + ivpu_dbg(vdev, PM, "Pre-reset..\n"); + atomic_set(&vdev->pm->in_reset, 1); + ivpu_shutdown(vdev); + ivpu_pm_prepare_cold_boot(vdev); + ivpu_jobs_abort_all(vdev); + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); +} + +void ivpu_pm_reset_done_cb(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + int ret; + + ivpu_dbg(vdev, PM, "Post-reset..\n"); + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); + atomic_set(&vdev->pm->in_reset, 0); + ivpu_dbg(vdev, PM, "Post-reset done.\n"); + + pm_runtime_put_autosuspend(vdev->drm.dev); +} + +int ivpu_pm_init(struct ivpu_device *vdev) +{ + struct device *dev = vdev->drm.dev; + struct ivpu_pm_info *pm = vdev->pm; + + pm->vdev = vdev; + pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + atomic_set(&pm->in_reset, 0); + INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); + + pm_runtime_use_autosuspend(dev); + + if (ivpu_disable_recovery) + pm_runtime_set_autosuspend_delay(dev, -1); + else if (ivpu_is_silicon(vdev)) + pm_runtime_set_autosuspend_delay(dev, 100); + else + pm_runtime_set_autosuspend_delay(dev, 60000); + + return 0; +} + +void ivpu_pm_enable(struct ivpu_device *vdev) +{ + struct device *dev = vdev->drm.dev; + + pm_runtime_set_active(dev); + pm_runtime_allow(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + ivpu_dbg(vdev, RPM, "Enable RPM count %d\n", atomic_read(&dev->power.usage_count)); +} + +void ivpu_pm_disable(struct ivpu_device *vdev) +{ + struct device *dev = vdev->drm.dev; + + ivpu_dbg(vdev, RPM, "Disable RPM count %d\n", atomic_read(&dev->power.usage_count)); + + pm_runtime_get_noresume(vdev->drm.dev); + pm_runtime_forbid(vdev->drm.dev); +} diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h new file mode 100644 index 000000000000..dc1b3758e13f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_PM_H__ +#define __IVPU_PM_H__ + +#include + +struct ivpu_device; + +struct ivpu_pm_info { + struct ivpu_device *vdev; + struct work_struct recovery_work; + atomic_t in_reset; + bool is_warmboot; + u32 suspend_reschedule_counter; +}; + +int ivpu_pm_init(struct ivpu_device *vdev); +void ivpu_pm_enable(struct ivpu_device *vdev); +void ivpu_pm_disable(struct ivpu_device *vdev); + +int ivpu_pm_suspend_cb(struct device *dev); +int ivpu_pm_resume_cb(struct device *dev); +int ivpu_pm_runtime_suspend_cb(struct device *dev); +int ivpu_pm_runtime_resume_cb(struct device *dev); + +void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev); +void ivpu_pm_reset_done_cb(struct pci_dev *pdev); + +int __must_check ivpu_rpm_get(struct ivpu_device *vdev); +void ivpu_rpm_put(struct ivpu_device *vdev); + +void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); + +#endif /* __IVPU_PM_H__ */ From 783dedc535703ec6fa7c96754ac264b482821e2f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:27 +0200 Subject: [PATCH 21/45] drm/edid: store quirks in display info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the quirks are internal to EDID parsing, it'll be helpful to store them in display info to avoid having to pass them around. This will also help separate adding probed modes (which needs the quirks) from updating display info. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/819b908f64ad2d158245917f436f24d33a65b95d.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 42 ++++++++++++++++++------------------- include/drm/drm_connector.h | 5 +++++ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 93067b8dd9f6..41458c80d2af 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6428,18 +6428,20 @@ static void drm_reset_display_info(struct drm_connector *connector) kfree(info->vics); info->vics = NULL; info->vics_len = 0; + + info->quirks = 0; } -static u32 update_display_info(struct drm_connector *connector, - const struct drm_edid *drm_edid) +static void update_display_info(struct drm_connector *connector, + const struct drm_edid *drm_edid) { struct drm_display_info *info = &connector->display_info; const struct edid *edid = drm_edid->edid; - u32 quirks = edid_get_quirks(drm_edid); - drm_reset_display_info(connector); + info->quirks = edid_get_quirks(drm_edid); + info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; @@ -6510,17 +6512,15 @@ static u32 update_display_info(struct drm_connector *connector, drm_update_mso(connector, drm_edid); out: - if (quirks & EDID_QUIRK_NON_DESKTOP) { + if (info->quirks & EDID_QUIRK_NON_DESKTOP) { drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] Non-desktop display%s\n", connector->base.id, connector->name, info->non_desktop ? " (redundant quirk)" : ""); info->non_desktop = true; } - if (quirks & EDID_QUIRK_CAP_DSC_15BPP) + if (info->quirks & EDID_QUIRK_CAP_DSC_15BPP) info->max_dsc_bpp = 15; - - return quirks; } static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *dev, @@ -6618,8 +6618,8 @@ static int add_displayid_detailed_modes(struct drm_connector *connector, static int _drm_edid_connector_update(struct drm_connector *connector, const struct drm_edid *drm_edid) { + struct drm_display_info *info = &connector->display_info; int num_modes = 0; - u32 quirks; if (!drm_edid) { drm_reset_display_info(connector); @@ -6632,7 +6632,7 @@ static int _drm_edid_connector_update(struct drm_connector *connector, * To avoid multiple parsing of same block, lets parse that map * from sink info, before parsing CEA modes. */ - quirks = update_display_info(connector, drm_edid); + update_display_info(connector, drm_edid); /* Depends on info->cea_rev set by update_display_info() above */ drm_edid_to_eld(connector, drm_edid); @@ -6651,7 +6651,7 @@ static int _drm_edid_connector_update(struct drm_connector *connector, * * XXX order for additional mode types in extension blocks? */ - num_modes += add_detailed_modes(connector, drm_edid, quirks); + num_modes += add_detailed_modes(connector, drm_edid, info->quirks); num_modes += add_cvt_modes(connector, drm_edid); num_modes += add_standard_modes(connector, drm_edid); num_modes += add_established_modes(connector, drm_edid); @@ -6661,20 +6661,20 @@ static int _drm_edid_connector_update(struct drm_connector *connector, if (drm_edid->edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) num_modes += add_inferred_modes(connector, drm_edid); - if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75)) - edid_fixup_preferred(connector, quirks); + if (info->quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75)) + edid_fixup_preferred(connector, info->quirks); - if (quirks & EDID_QUIRK_FORCE_6BPC) - connector->display_info.bpc = 6; + if (info->quirks & EDID_QUIRK_FORCE_6BPC) + info->bpc = 6; - if (quirks & EDID_QUIRK_FORCE_8BPC) - connector->display_info.bpc = 8; + if (info->quirks & EDID_QUIRK_FORCE_8BPC) + info->bpc = 8; - if (quirks & EDID_QUIRK_FORCE_10BPC) - connector->display_info.bpc = 10; + if (info->quirks & EDID_QUIRK_FORCE_10BPC) + info->bpc = 10; - if (quirks & EDID_QUIRK_FORCE_12BPC) - connector->display_info.bpc = 12; + if (info->quirks & EDID_QUIRK_FORCE_12BPC) + info->bpc = 12; return num_modes; } diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 1c26c4e72c62..7b5048516185 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -728,6 +728,11 @@ struct drm_display_info { * @vics_len: Number of elements in vics. Internal to EDID parsing. */ int vics_len; + + /** + * @quirks: EDID based quirks. Internal to EDID parsing. + */ + u32 quirks; }; int drm_display_info_set_bus_formats(struct drm_display_info *info, From 4959b693d72d61e4c98bb16bca00c36c9faf524a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:28 +0200 Subject: [PATCH 22/45] drm/edid: stop passing quirks around MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that quirks are stored in display info, we can just look them up using the connector instead of having to pass them around. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/d55049dd9b2e48e63103f2dfa49bc9b25dd57f82.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 41458c80d2af..ffc5a2f0781f 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -96,7 +96,6 @@ struct detailed_mode_closure { struct drm_connector *connector; const struct drm_edid *drm_edid; bool preferred; - u32 quirks; int modes; }; @@ -2887,9 +2886,9 @@ static u32 edid_get_quirks(const struct drm_edid *drm_edid) * Walk the mode list for connector, clearing the preferred status on existing * modes and setting it anew for the right mode ala quirks. */ -static void edid_fixup_preferred(struct drm_connector *connector, - u32 quirks) +static void edid_fixup_preferred(struct drm_connector *connector) { + const struct drm_display_info *info = &connector->display_info; struct drm_display_mode *t, *cur_mode, *preferred_mode; int target_refresh = 0; int cur_vrefresh, preferred_vrefresh; @@ -2897,9 +2896,9 @@ static void edid_fixup_preferred(struct drm_connector *connector, if (list_empty(&connector->probed_modes)) return; - if (quirks & EDID_QUIRK_PREFER_LARGE_60) + if (info->quirks & EDID_QUIRK_PREFER_LARGE_60) target_refresh = 60; - if (quirks & EDID_QUIRK_PREFER_LARGE_75) + if (info->quirks & EDID_QUIRK_PREFER_LARGE_75) target_refresh = 75; preferred_mode = list_first_entry(&connector->probed_modes, @@ -3401,9 +3400,9 @@ drm_mode_do_interlace_quirk(struct drm_display_mode *mode, */ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connector, const struct drm_edid *drm_edid, - const struct detailed_timing *timing, - u32 quirks) + const struct detailed_timing *timing) { + const struct drm_display_info *info = &connector->display_info; struct drm_device *dev = connector->dev; struct drm_display_mode *mode; const struct detailed_pixel_timing *pt = &timing->data.pixel_data; @@ -3437,7 +3436,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto return NULL; } - if (quirks & EDID_QUIRK_FORCE_REDUCED_BLANKING) { + if (info->quirks & EDID_QUIRK_FORCE_REDUCED_BLANKING) { mode = drm_cvt_mode(dev, hactive, vactive, 60, true, false, false); if (!mode) return NULL; @@ -3449,7 +3448,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto if (!mode) return NULL; - if (quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH) + if (info->quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH) mode->clock = 1088 * 10; else mode->clock = le16_to_cpu(timing->pixel_clock) * 10; @@ -3472,7 +3471,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto drm_mode_do_interlace_quirk(mode, pt); - if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) { + if (info->quirks & EDID_QUIRK_DETAILED_SYNC_PP) { mode->flags |= DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC; } else { mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ? @@ -3485,12 +3484,12 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto mode->width_mm = pt->width_mm_lo | (pt->width_height_mm_hi & 0xf0) << 4; mode->height_mm = pt->height_mm_lo | (pt->width_height_mm_hi & 0xf) << 8; - if (quirks & EDID_QUIRK_DETAILED_IN_CM) { + if (info->quirks & EDID_QUIRK_DETAILED_IN_CM) { mode->width_mm *= 10; mode->height_mm *= 10; } - if (quirks & EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE) { + if (info->quirks & EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE) { mode->width_mm = drm_edid->edid->width_cm * 10; mode->height_mm = drm_edid->edid->height_cm * 10; } @@ -4003,8 +4002,7 @@ do_detailed_mode(const struct detailed_timing *timing, void *c) return; newmode = drm_mode_detailed(closure->connector, - closure->drm_edid, timing, - closure->quirks); + closure->drm_edid, timing); if (!newmode) return; @@ -4027,15 +4025,13 @@ do_detailed_mode(const struct detailed_timing *timing, void *c) * add_detailed_modes - Add modes from detailed timings * @connector: attached connector * @drm_edid: EDID block to scan - * @quirks: quirks to apply */ static int add_detailed_modes(struct drm_connector *connector, - const struct drm_edid *drm_edid, u32 quirks) + const struct drm_edid *drm_edid) { struct detailed_mode_closure closure = { .connector = connector, .drm_edid = drm_edid, - .quirks = quirks, }; if (drm_edid->edid->revision >= 4) @@ -6651,7 +6647,7 @@ static int _drm_edid_connector_update(struct drm_connector *connector, * * XXX order for additional mode types in extension blocks? */ - num_modes += add_detailed_modes(connector, drm_edid, info->quirks); + num_modes += add_detailed_modes(connector, drm_edid); num_modes += add_cvt_modes(connector, drm_edid); num_modes += add_standard_modes(connector, drm_edid); num_modes += add_established_modes(connector, drm_edid); @@ -6662,7 +6658,7 @@ static int _drm_edid_connector_update(struct drm_connector *connector, num_modes += add_inferred_modes(connector, drm_edid); if (info->quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75)) - edid_fixup_preferred(connector, info->quirks); + edid_fixup_preferred(connector); if (info->quirks & EDID_QUIRK_FORCE_6BPC) info->bpc = 6; From 45ea02d1ca3b18fa3479a561ea4f6d03737f2d30 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:29 +0200 Subject: [PATCH 23/45] drm/edid: merge ELD handling to update_display_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify display info update by merging ELD handling as well as clearing of the data in update_display_info(). The connector->eld really should be moved under display_info altogether, but that's for another time. Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1f2e7424b998fbcdd9cea488e7d6d7cbb26c460f.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ffc5a2f0781f..f58b0fdd582b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5485,8 +5485,6 @@ static void drm_edid_to_eld(struct drm_connector *connector, int total_sad_count = 0; int mnl; - clear_eld(connector); - if (!drm_edid) return; @@ -6432,9 +6430,15 @@ static void update_display_info(struct drm_connector *connector, const struct drm_edid *drm_edid) { struct drm_display_info *info = &connector->display_info; - const struct edid *edid = drm_edid->edid; + const struct edid *edid; drm_reset_display_info(connector); + clear_eld(connector); + + if (!drm_edid) + return; + + edid = drm_edid->edid; info->quirks = edid_get_quirks(drm_edid); @@ -6517,6 +6521,9 @@ static void update_display_info(struct drm_connector *connector, if (info->quirks & EDID_QUIRK_CAP_DSC_15BPP) info->max_dsc_bpp = 15; + + /* Depends on info->cea_rev set by drm_parse_cea_ext() above */ + drm_edid_to_eld(connector, drm_edid); } static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *dev, @@ -6617,12 +6624,6 @@ static int _drm_edid_connector_update(struct drm_connector *connector, struct drm_display_info *info = &connector->display_info; int num_modes = 0; - if (!drm_edid) { - drm_reset_display_info(connector); - clear_eld(connector); - return 0; - } - /* * CEA-861-F adds ycbcr capability map block, for HDMI 2.0 sinks. * To avoid multiple parsing of same block, lets parse that map @@ -6630,8 +6631,8 @@ static int _drm_edid_connector_update(struct drm_connector *connector, */ update_display_info(connector, drm_edid); - /* Depends on info->cea_rev set by update_display_info() above */ - drm_edid_to_eld(connector, drm_edid); + if (!drm_edid) + return 0; /* * EDID spec says modes should be preferred in this order: @@ -6768,10 +6769,7 @@ static int _drm_connector_update_edid_property(struct drm_connector *connector, * that it seems better to duplicate it rather than attempt to ensure * some arbitrary ordering of calls. */ - if (drm_edid) - update_display_info(connector, drm_edid); - else - drm_reset_display_info(connector); + update_display_info(connector, drm_edid); _drm_update_tile_info(connector, drm_edid); From 43bde505d66a41c2ad706d603e97b2c8aa2fbe4a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Jan 2023 12:05:30 +0200 Subject: [PATCH 24/45] drm/edid: move EDID BPC quirk application to update_display_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BPC quirks are closer to home in update_display_info(). Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/8997e0fa3b0fd03c920e72d1dff24c0d96ff4dd0.1672826282.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f58b0fdd582b..e8b67f3f5c91 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6522,6 +6522,18 @@ static void update_display_info(struct drm_connector *connector, if (info->quirks & EDID_QUIRK_CAP_DSC_15BPP) info->max_dsc_bpp = 15; + if (info->quirks & EDID_QUIRK_FORCE_6BPC) + info->bpc = 6; + + if (info->quirks & EDID_QUIRK_FORCE_8BPC) + info->bpc = 8; + + if (info->quirks & EDID_QUIRK_FORCE_10BPC) + info->bpc = 10; + + if (info->quirks & EDID_QUIRK_FORCE_12BPC) + info->bpc = 12; + /* Depends on info->cea_rev set by drm_parse_cea_ext() above */ drm_edid_to_eld(connector, drm_edid); } @@ -6621,7 +6633,7 @@ static int add_displayid_detailed_modes(struct drm_connector *connector, static int _drm_edid_connector_update(struct drm_connector *connector, const struct drm_edid *drm_edid) { - struct drm_display_info *info = &connector->display_info; + const struct drm_display_info *info = &connector->display_info; int num_modes = 0; /* @@ -6661,18 +6673,6 @@ static int _drm_edid_connector_update(struct drm_connector *connector, if (info->quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75)) edid_fixup_preferred(connector); - if (info->quirks & EDID_QUIRK_FORCE_6BPC) - info->bpc = 6; - - if (info->quirks & EDID_QUIRK_FORCE_8BPC) - info->bpc = 8; - - if (info->quirks & EDID_QUIRK_FORCE_10BPC) - info->bpc = 10; - - if (info->quirks & EDID_QUIRK_FORCE_12BPC) - info->bpc = 12; - return num_modes; } From ebd05c0e12edc568243056711ce00f79e19eab68 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 19 Jan 2023 15:04:12 +0100 Subject: [PATCH 25/45] drm/panel: vtdr6130: fix unused ret in visionox_vtdr6130_bl_update_status Fix the following warning: panel-visionox-vtdr6130.c:249:12: warning: 'ret' is used uninitialized [-Wuninitialized] Fixes: 9402cde9347e ("drm/panel: vtdr6130: Use 16-bit brightness function") Reported-by: Daniel Vetter Reported-by: kernel test robot Reviewed-by: Sam Ravnborg Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230119-topic-sm8550-vtdr6130-fixup-v1-1-82c4fb008138@linaro.org --- drivers/gpu/drm/panel/panel-visionox-vtdr6130.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-visionox-vtdr6130.c b/drivers/gpu/drm/panel/panel-visionox-vtdr6130.c index 1092075b31a5..bb0dfd86ea67 100644 --- a/drivers/gpu/drm/panel/panel-visionox-vtdr6130.c +++ b/drivers/gpu/drm/panel/panel-visionox-vtdr6130.c @@ -243,13 +243,8 @@ static int visionox_vtdr6130_bl_update_status(struct backlight_device *bl) { struct mipi_dsi_device *dsi = bl_get_data(bl); u16 brightness = backlight_get_brightness(bl); - int ret; - mipi_dsi_dcs_set_display_brightness_large(dsi, brightness); - if (ret < 0) - return ret; - - return 0; + return mipi_dsi_dcs_set_display_brightness_large(dsi, brightness); } static const struct backlight_ops visionox_vtdr6130_bl_ops = { From ce1fc502de429c9b4dd30775e354bb29453256bb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 20 Jan 2023 06:09:56 +0000 Subject: [PATCH 26/45] MAINTAINERS: Add Sumit Semwal and Yongqin Liu as reviwers for kirin DRM driver I no longer have access to the HiKey boards, so while I'm happy to review code, I wanted to add Sumit and Yongqin to the reviewers list so they would get CC'ed on future changes and would be able to have a chance to validate and provide Tested-by: tags Cc: Xinliang Liu Cc: Tian Tao Cc: Yongqin Liu Cc: Sumit Semwal Cc: kernel-team@android.com Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230120060956.1244187-1-jstultz@google.com --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 071f7b62a36e..2758e7a64fa5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6995,8 +6995,10 @@ F: drivers/gpu/drm/gma500/ DRM DRIVERS FOR HISILICON M: Xinliang Liu M: Tian Tao -R: John Stultz R: Xinwei Kong +R: Sumit Semwal +R: Yongqin Liu +R: John Stultz L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc From 996e1defca34485dd2bd70b173f069aab5f21a65 Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Mon, 12 Dec 2022 20:27:44 +0530 Subject: [PATCH 27/45] drm: exynos: dsi: Fix MIPI_DSI*_NO_* mode flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HFP/HBP/HSA/EOT_PACKET modes in Exynos DSI host specifies 0 = Enable and 1 = Disable. The logic for checking these mode flags was correct before the MIPI_DSI*_NO_* mode flag conversion. This patch is trying to fix this MIPI_DSI*_NO_* mode flags handling Exynos DSI host and update the mode_flags in relevant panel drivers. Fixes: 0f3b68b66a6d ("drm/dsi: Add _NO_ to MIPI_DSI_* flags disabling features") Reviewed-by: Marek Vasut Reviewed-by: Nicolas Boichat Reported-by: Sébastien Szymanski Signed-off-by: Jagan Teki Reviewed-by: Frieder Schrempf Signed-off-by: Marek Vasut Link: https://patchwork.freedesktop.org/patch/msgid/20221212145745.15387-1-jagan@amarulasolutions.com --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 8 ++++---- drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c | 4 +++- drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c | 3 ++- drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c | 2 -- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index ec673223d6b7..b5305b145ddb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -805,15 +805,15 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) reg |= DSIM_AUTO_MODE; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HSE) reg |= DSIM_HSE_MODE; - if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP)) + if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP) reg |= DSIM_HFP_MODE; - if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP)) + if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP) reg |= DSIM_HBP_MODE; - if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA)) + if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA) reg |= DSIM_HSA_MODE; } - if (!(dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)) + if (dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET) reg |= DSIM_EOT_DISABLE; switch (dsi->format) { diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c index 5c621b15e84c..439ef3073512 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c @@ -692,7 +692,9 @@ static int s6e3ha2_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_CLOCK_NON_CONTINUOUS; + dsi->mode_flags = MIPI_DSI_CLOCK_NON_CONTINUOUS | + MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP | + MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET; ctx->supplies[0].supply = "vdd3"; ctx->supplies[1].supply = "vci"; diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c index e06fd35de814..9c3e76171759 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c @@ -446,7 +446,8 @@ static int s6e63j0x03_probe(struct mipi_dsi_device *dsi) dsi->lanes = 1; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_MODE_NO_EOT_PACKET; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO_NO_HFP | + MIPI_DSI_MODE_VIDEO_NO_HBP | MIPI_DSI_MODE_VIDEO_NO_HSA; ctx->supplies[0].supply = "vdd3"; ctx->supplies[1].supply = "vci"; diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c index 54213beafaf5..ebf4c2d39ea8 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c @@ -990,8 +990,6 @@ static int s6e8aa0_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST - | MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP - | MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_AUTO_VERT; ret = s6e8aa0_parse_dt(ctx); From 2e337a8d14bd4b04913d52ccf076be29d846acd7 Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Mon, 12 Dec 2022 20:27:45 +0530 Subject: [PATCH 28/45] drm: exynos: dsi: Properly name HSA/HBP/HFP/HSE bits HSA/HBP/HFP/HSE mode bits in Processor Reference Manuals specify a naming conversion as 'disable mode bit' due to its bit definition, 0 = Enable and 1 = Disable. For HSE bit, the i.MX 8M Mini/Nano/Plus Applications Processor Reference Manual named this bit as 'HseDisableMode' but the bit definition is quite opposite like 0 = Disables transfer 1 = Enables transfer which clearly states that HSE is not a disable bit. HSE is named as per the manual even though it is not a disable bit however the driver logic for handling HSE is based on the MIPI_DSI_MODE_VIDEO_HSE flag itself. Cc: Nicolas Boichat Reviewed-by: Marek Vasut Signed-off-by: Jagan Teki Reviewed-by: Frieder Schrempf Signed-off-by: Marek Vasut Link: https://patchwork.freedesktop.org/patch/msgid/20221212145745.15387-2-jagan@amarulasolutions.com --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 33 +++++++++++++++++++------ 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index b5305b145ddb..320c370cfe24 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -75,10 +75,27 @@ #define DSIM_MAIN_PIX_FORMAT_RGB565 (0x4 << 12) #define DSIM_SUB_VC (((x) & 0x3) << 16) #define DSIM_MAIN_VC (((x) & 0x3) << 18) -#define DSIM_HSA_MODE (1 << 20) -#define DSIM_HBP_MODE (1 << 21) -#define DSIM_HFP_MODE (1 << 22) -#define DSIM_HSE_MODE (1 << 23) +#define DSIM_HSA_DISABLE_MODE (1 << 20) +#define DSIM_HBP_DISABLE_MODE (1 << 21) +#define DSIM_HFP_DISABLE_MODE (1 << 22) +/* + * The i.MX 8M Mini Applications Processor Reference Manual, + * Rev. 3, 11/2020 Page 4091 + * The i.MX 8M Nano Applications Processor Reference Manual, + * Rev. 2, 07/2022 Page 3058 + * The i.MX 8M Plus Applications Processor Reference Manual, + * Rev. 1, 06/2021 Page 5436 + * named this bit as 'HseDisableMode' but the bit definition + * is quite opposite like + * 0 = Disables transfer + * 1 = Enables transfer + * which clearly states that HSE is not a disable bit. + * + * This bit is named as per the manual even though it is not + * a disable bit however the driver logic for handling HSE + * is based on the MIPI_DSI_MODE_VIDEO_HSE flag itself. + */ +#define DSIM_HSE_DISABLE_MODE (1 << 23) #define DSIM_AUTO_MODE (1 << 24) #define DSIM_VIDEO_MODE (1 << 25) #define DSIM_BURST_MODE (1 << 26) @@ -804,13 +821,13 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_AUTO_VERT) reg |= DSIM_AUTO_MODE; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HSE) - reg |= DSIM_HSE_MODE; + reg |= DSIM_HSE_DISABLE_MODE; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP) - reg |= DSIM_HFP_MODE; + reg |= DSIM_HFP_DISABLE_MODE; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP) - reg |= DSIM_HBP_MODE; + reg |= DSIM_HBP_DISABLE_MODE; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA) - reg |= DSIM_HSA_MODE; + reg |= DSIM_HSA_DISABLE_MODE; } if (dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET) From 35b9dca5626207b0c3933d2fcd38f81bb02a17bf Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 21 Jan 2023 17:24:18 +0100 Subject: [PATCH 29/45] dt-bindings: display/panel: Add AUO A030JTN01 Add binding for the AUO A030JTN01 panel, which is a 320x480 3.0" 4:3 24-bit TFT LCD panel with non-square pixels and a delta-RGB 8-bit interface. Signed-off-by: Christophe Branchereau Signed-off-by: Paul Cercueil Reviewed-by: Krzysztof Kozlowski Link: https://patchwork.freedesktop.org/patch/msgid/20230121162419.284523-2-cbranchereau@gmail.com --- .../bindings/display/panel/auo,a030jtn01.yaml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml diff --git a/Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml b/Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml new file mode 100644 index 000000000000..86c834eb4d98 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/auo,a030jtn01.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: AUO A030JTN01 3.0" (320x480 pixels) 24-bit TFT LCD panel + +description: | + Delta RGB 8-bit panel found in some Retrogame handhelds + +maintainers: + - Paul Cercueil + - Christophe Branchereau + +allOf: + - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +properties: + compatible: + const: auo,a030jtn01 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - power-supply + - reset-gpios + +unevaluatedProperties: false + +examples: + - | + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "auo,a030jtn01"; + reg = <0>; + + spi-max-frequency = <10000000>; + + reset-gpios = <&gpe 4 GPIO_ACTIVE_LOW>; + power-supply = <&lcd_power>; + + backlight = <&backlight>; + + port { + panel_input: endpoint { + remote-endpoint = <&panel_output>; + }; + }; + }; + }; From 6febdb2491626d4ae8c54dff51716b2695bcd12d Mon Sep 17 00:00:00 2001 From: Christophe Branchereau Date: Sat, 21 Jan 2023 17:24:19 +0100 Subject: [PATCH 30/45] drm/panel: Add driver for the AUO A030JTN01 TFT LCD Add driver for the AUO A030JTN01 panel, which is a 320x480 3.0" 4:3 24-bit TFT LCD panel with non-square pixels and a delta-RGB 8-bit interface. Signed-off-by: Christophe Branchereau Signed-off-by: Paul Cercueil Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20230121162419.284523-3-cbranchereau@gmail.com --- drivers/gpu/drm/panel/Kconfig | 8 + drivers/gpu/drm/panel/Makefile | 1 + drivers/gpu/drm/panel/panel-auo-a030jtn01.c | 308 ++++++++++++++++++++ 3 files changed, 317 insertions(+) create mode 100644 drivers/gpu/drm/panel/panel-auo-a030jtn01.c diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 8f2046caaa6b..8eeee71c0000 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -37,6 +37,14 @@ config DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 NT35596 1080x1920 video mode panel as found in some Asus Zenfone 2 Laser Z00T devices. +config DRM_PANEL_AUO_A030JTN01 + tristate "AUO A030JTN01" + depends on SPI + select REGMAP_SPI + help + Say Y here to enable support for the AUO A030JTN01 320x480 3.0" panel + as found in the YLM RS-97 handheld gaming console. + config DRM_PANEL_BOE_BF060Y8M_AJ0 tristate "Boe BF060Y8M-AJ0 panel" depends on OF diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index d07e6fa28bdf..c05aa9e23907 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_DRM_PANEL_ABT_Y030XX067A) += panel-abt-y030xx067a.o obj-$(CONFIG_DRM_PANEL_ARM_VERSATILE) += panel-arm-versatile.o obj-$(CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596) += panel-asus-z00t-tm5p5-n35596.o +obj-$(CONFIG_DRM_PANEL_AUO_A030JTN01) += panel-auo-a030jtn01.o obj-$(CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0) += panel-boe-bf060y8m-aj0.o obj-$(CONFIG_DRM_PANEL_BOE_HIMAX8279D) += panel-boe-himax8279d.o obj-$(CONFIG_DRM_PANEL_BOE_TV101WUM_NL6) += panel-boe-tv101wum-nl6.o diff --git a/drivers/gpu/drm/panel/panel-auo-a030jtn01.c b/drivers/gpu/drm/panel/panel-auo-a030jtn01.c new file mode 100644 index 000000000000..3c976a98de6a --- /dev/null +++ b/drivers/gpu/drm/panel/panel-auo-a030jtn01.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AU Optronics A030JTN01.0 TFT LCD panel driver + * + * Copyright (C) 2023, Paul Cercueil + * Copyright (C) 2023, Christophe Branchereau + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define REG05 0x05 +#define REG06 0x06 +#define REG07 0x07 + +#define REG05_STDBY BIT(0) +#define REG06_VBLK GENMASK(4, 0) +#define REG07_HBLK GENMASK(7, 0) + + +struct a030jtn01_info { + const struct drm_display_mode *display_modes; + unsigned int num_modes; + u16 width_mm, height_mm; + u32 bus_format, bus_flags; +}; + +struct a030jtn01 { + struct drm_panel panel; + struct spi_device *spi; + struct regmap *map; + + const struct a030jtn01_info *panel_info; + + struct regulator *supply; + struct gpio_desc *reset_gpio; +}; + +static inline struct a030jtn01 *to_a030jtn01(struct drm_panel *panel) +{ + return container_of(panel, struct a030jtn01, panel); +} + +static int a030jtn01_prepare(struct drm_panel *panel) +{ + struct a030jtn01 *priv = to_a030jtn01(panel); + struct device *dev = &priv->spi->dev; + unsigned int dummy; + int err; + + err = regulator_enable(priv->supply); + if (err) { + dev_err(dev, "Failed to enable power supply: %d\n", err); + return err; + } + + usleep_range(1000, 8000); + + /* Reset the chip */ + gpiod_set_value_cansleep(priv->reset_gpio, 1); + usleep_range(100, 8000); + gpiod_set_value_cansleep(priv->reset_gpio, 0); + usleep_range(2000, 8000); + + /* + * No idea why, but a register read (doesn't matter which) is needed to + * properly initialize the chip after a reset; otherwise, the colors + * will be wrong. It doesn't seem to be timing-related as a msleep(200) + * doesn't fix it. + */ + err = regmap_read(priv->map, REG05, &dummy); + if (err) + goto err_disable_regulator; + + /* Use (24 + 6) == 0x1e as the vertical back porch */ + err = regmap_write(priv->map, REG06, FIELD_PREP(REG06_VBLK, 0x1e)); + if (err) + goto err_disable_regulator; + + /* Use (42 + 30) * 3 == 0xd8 as the horizontal back porch */ + err = regmap_write(priv->map, REG07, FIELD_PREP(REG07_HBLK, 0xd8)); + if (err) + goto err_disable_regulator; + + return 0; + +err_disable_regulator: + gpiod_set_value_cansleep(priv->reset_gpio, 1); + regulator_disable(priv->supply); + return err; +} + +static int a030jtn01_unprepare(struct drm_panel *panel) +{ + struct a030jtn01 *priv = to_a030jtn01(panel); + + gpiod_set_value_cansleep(priv->reset_gpio, 1); + regulator_disable(priv->supply); + + return 0; +} + +static int a030jtn01_enable(struct drm_panel *panel) +{ + struct a030jtn01 *priv = to_a030jtn01(panel); + int ret; + + ret = regmap_set_bits(priv->map, REG05, REG05_STDBY); + if (ret) + return ret; + + /* Wait for the picture to be stable */ + if (panel->backlight) + msleep(100); + + return 0; +} + +static int a030jtn01_disable(struct drm_panel *panel) +{ + struct a030jtn01 *priv = to_a030jtn01(panel); + + return regmap_clear_bits(priv->map, REG05, REG05_STDBY); +} + +static int a030jtn01_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct a030jtn01 *priv = to_a030jtn01(panel); + const struct a030jtn01_info *panel_info = priv->panel_info; + struct drm_display_mode *mode; + unsigned int i; + + for (i = 0; i < panel_info->num_modes; i++) { + mode = drm_mode_duplicate(connector->dev, + &panel_info->display_modes[i]); + if (!mode) + return -ENOMEM; + + drm_mode_set_name(mode); + + mode->type = DRM_MODE_TYPE_DRIVER; + if (panel_info->num_modes == 1) + mode->type |= DRM_MODE_TYPE_PREFERRED; + + drm_mode_probed_add(connector, mode); + } + + connector->display_info.bpc = 8; + connector->display_info.width_mm = panel_info->width_mm; + connector->display_info.height_mm = panel_info->height_mm; + + drm_display_info_set_bus_formats(&connector->display_info, + &panel_info->bus_format, 1); + connector->display_info.bus_flags = panel_info->bus_flags; + + return panel_info->num_modes; +} + +static const struct drm_panel_funcs a030jtn01_funcs = { + .prepare = a030jtn01_prepare, + .unprepare = a030jtn01_unprepare, + .enable = a030jtn01_enable, + .disable = a030jtn01_disable, + .get_modes = a030jtn01_get_modes, +}; + +static bool a030jtn01_has_reg(struct device *dev, unsigned int reg) +{ + static const u32 a030jtn01_regs_mask = 0x001823f1fb; + + return a030jtn01_regs_mask & BIT(reg); +}; + +static const struct regmap_config a030jtn01_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .read_flag_mask = 0x40, + .max_register = 0x1c, + .readable_reg = a030jtn01_has_reg, + .writeable_reg = a030jtn01_has_reg, +}; + +static int a030jtn01_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct a030jtn01 *priv; + int err; + + spi->mode |= SPI_MODE_3 | SPI_3WIRE; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->spi = spi; + spi_set_drvdata(spi, priv); + + priv->map = devm_regmap_init_spi(spi, &a030jtn01_regmap_config); + if (IS_ERR(priv->map)) + return dev_err_probe(dev, PTR_ERR(priv->map), "Unable to init regmap"); + + priv->panel_info = spi_get_device_match_data(spi); + if (!priv->panel_info) + return -EINVAL; + + priv->supply = devm_regulator_get(dev, "power"); + if (IS_ERR(priv->supply)) + return dev_err_probe(dev, PTR_ERR(priv->supply), "Failed to get power supply"); + + priv->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(priv->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(priv->reset_gpio), "Failed to get reset GPIO"); + + drm_panel_init(&priv->panel, dev, &a030jtn01_funcs, + DRM_MODE_CONNECTOR_DPI); + + err = drm_panel_of_backlight(&priv->panel); + if (err) + return err; + + drm_panel_add(&priv->panel); + + return 0; +} + +static void a030jtn01_remove(struct spi_device *spi) +{ + struct a030jtn01 *priv = spi_get_drvdata(spi); + + drm_panel_remove(&priv->panel); + drm_panel_disable(&priv->panel); + drm_panel_unprepare(&priv->panel); +} + +static const struct drm_display_mode a030jtn01_modes[] = { + { /* 60 Hz */ + .clock = 14400, + .hdisplay = 320, + .hsync_start = 320 + 8, + .hsync_end = 320 + 8 + 42, + .htotal = 320 + 8 + 42 + 30, + .vdisplay = 480, + .vsync_start = 480 + 90, + .vsync_end = 480 + 90 + 24, + .vtotal = 480 + 90 + 24 + 6, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + }, + { /* 50 Hz */ + .clock = 12000, + .hdisplay = 320, + .hsync_start = 320 + 8, + .hsync_end = 320 + 8 + 42, + .htotal = 320 + 8 + 42 + 30, + .vdisplay = 480, + .vsync_start = 480 + 90, + .vsync_end = 480 + 90 + 24, + .vtotal = 480 + 90 + 24 + 6, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + }, +}; + +static const struct a030jtn01_info a030jtn01_info = { + .display_modes = a030jtn01_modes, + .num_modes = ARRAY_SIZE(a030jtn01_modes), + .width_mm = 70, + .height_mm = 51, + .bus_format = MEDIA_BUS_FMT_RGB888_3X8_DELTA, + .bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, +}; + +static const struct spi_device_id a030jtn01_id[] = { + { "a030jtn01", (kernel_ulong_t) &a030jtn01_info }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(spi, a030jtn01_id); + +static const struct of_device_id a030jtn01_of_match[] = { + { .compatible = "auo,a030jtn01" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, a030jtn01_of_match); + +static struct spi_driver a030jtn01_driver = { + .driver = { + .name = "auo-a030jtn01", + .of_match_table = a030jtn01_of_match, + }, + .id_table = a030jtn01_id, + .probe = a030jtn01_probe, + .remove = a030jtn01_remove, +}; +module_spi_driver(a030jtn01_driver); + +MODULE_AUTHOR("Paul Cercueil "); +MODULE_AUTHOR("Christophe Branchereau "); +MODULE_LICENSE("GPL"); From 5e68a0ca59c0891d6b7da8ae7f6764565ff67c6d Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 23 Jan 2023 10:14:48 +0800 Subject: [PATCH 31/45] dt-bindings: display: bridge: ldb: Add i.MX93 LDB Same to i.MX8mp LDB, i.MX93 LDB is controlled by mediamix blk-ctrl through 'ldb' register and 'lvds' register. Also, the 'ldb' clock is required. i.MX93 LDB supports only one LVDS channel(channel 0, a.k.a, LVDS Channel-A in the device tree binding documentation), while i.MX8mp LDB supports at most two. Add i.MX93 LDB device tree binding in the existing i.MX8mp LDB device tree binding documentation. Acked-by: Krzysztof Kozlowski Signed-off-by: Liu Ying Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230123021449.969243-2-victor.liu@nxp.com --- .../bindings/display/bridge/fsl,ldb.yaml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml index b19be0804abe..6e0e3ba9b49e 100644 --- a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml +++ b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml @@ -16,7 +16,9 @@ description: | properties: compatible: - const: fsl,imx8mp-ldb + enum: + - fsl,imx8mp-ldb + - fsl,imx93-ldb clocks: maxItems: 1 @@ -57,6 +59,18 @@ required: - clocks - ports +allOf: + - if: + properties: + compatible: + contains: + const: fsl,imx93-ldb + then: + properties: + ports: + properties: + port@2: false + additionalProperties: false examples: From 48865413c9ddab68a774b506b1940af0f6b6089d Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 23 Jan 2023 10:14:49 +0800 Subject: [PATCH 32/45] drm/bridge: fsl-ldb: Add i.MX93 LDB support Same to i.MX8mp LDB, i.MX93 LDB is controlled by mediamix blk-ctrl through LDB_CTRL and LVDS_CTRL registers. i.MX93 LDB supports only one LVDS channel(channel 0) and it's LVDS_CTRL register bit1 is used as LVDS_EN instead of CH1_EN. Add i.MX93 LDB support in the existing i.MX8mp LDB bridge driver by adding i.MX93 LDB compatible string and device data(to reflect different register offsets and LVDS_CTRL register bit1 definition). Reviewed-by: Marek Vasut Signed-off-by: Liu Ying Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230123021449.969243-3-victor.liu@nxp.com --- drivers/gpu/drm/bridge/fsl-ldb.c | 57 +++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index 9bcba8fc57e7..6bac160b395b 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -18,7 +18,6 @@ #include #include -#define LDB_CTRL 0x5c #define LDB_CTRL_CH0_ENABLE BIT(0) #define LDB_CTRL_CH0_DI_SELECT BIT(1) #define LDB_CTRL_CH1_ENABLE BIT(2) @@ -35,9 +34,13 @@ #define LDB_CTRL_ASYNC_FIFO_ENABLE BIT(24) #define LDB_CTRL_ASYNC_FIFO_THRESHOLD_MASK GENMASK(27, 25) -#define LVDS_CTRL 0x128 #define LVDS_CTRL_CH0_EN BIT(0) #define LVDS_CTRL_CH1_EN BIT(1) +/* + * LVDS_CTRL_LVDS_EN bit is poorly named in i.MX93 reference manual. + * Clear it to enable LVDS and set it to disable LVDS. + */ +#define LVDS_CTRL_LVDS_EN BIT(1) #define LVDS_CTRL_VBG_EN BIT(2) #define LVDS_CTRL_HS_EN BIT(3) #define LVDS_CTRL_PRE_EMPH_EN BIT(4) @@ -52,6 +55,29 @@ #define LVDS_CTRL_VBG_ADJ(n) (((n) & 0x7) << 17) #define LVDS_CTRL_VBG_ADJ_MASK GENMASK(19, 17) +enum fsl_ldb_devtype { + IMX8MP_LDB, + IMX93_LDB, +}; + +struct fsl_ldb_devdata { + u32 ldb_ctrl; + u32 lvds_ctrl; + bool lvds_en_bit; +}; + +static const struct fsl_ldb_devdata fsl_ldb_devdata[] = { + [IMX8MP_LDB] = { + .ldb_ctrl = 0x5c, + .lvds_ctrl = 0x128, + }, + [IMX93_LDB] = { + .ldb_ctrl = 0x20, + .lvds_ctrl = 0x24, + .lvds_en_bit = true, + }, +}; + struct fsl_ldb { struct device *dev; struct drm_bridge bridge; @@ -59,6 +85,7 @@ struct fsl_ldb { struct clk *clk; struct regmap *regmap; bool lvds_dual_link; + const struct fsl_ldb_devdata *devdata; }; static inline struct fsl_ldb *to_fsl_ldb(struct drm_bridge *bridge) @@ -173,12 +200,12 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, reg |= LDB_CTRL_DI1_VSYNC_POLARITY; } - regmap_write(fsl_ldb->regmap, LDB_CTRL, reg); + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, reg); /* Program LVDS_CTRL */ reg = LVDS_CTRL_CC_ADJ(2) | LVDS_CTRL_PRE_EMPH_EN | LVDS_CTRL_PRE_EMPH_ADJ(3) | LVDS_CTRL_VBG_EN; - regmap_write(fsl_ldb->regmap, LVDS_CTRL, reg); + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, reg); /* Wait for VBG to stabilize. */ usleep_range(15, 20); @@ -187,7 +214,7 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, if (fsl_ldb->lvds_dual_link) reg |= LVDS_CTRL_CH1_EN; - regmap_write(fsl_ldb->regmap, LVDS_CTRL, reg); + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, reg); } static void fsl_ldb_atomic_disable(struct drm_bridge *bridge, @@ -195,9 +222,14 @@ static void fsl_ldb_atomic_disable(struct drm_bridge *bridge, { struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge); - /* Stop both channels. */ - regmap_write(fsl_ldb->regmap, LVDS_CTRL, 0); - regmap_write(fsl_ldb->regmap, LDB_CTRL, 0); + /* Stop channel(s). */ + if (fsl_ldb->devdata->lvds_en_bit) + /* Set LVDS_CTRL_LVDS_EN bit to disable. */ + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, + LVDS_CTRL_LVDS_EN); + else + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, 0); + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, 0); clk_disable_unprepare(fsl_ldb->clk); } @@ -263,6 +295,10 @@ static int fsl_ldb_probe(struct platform_device *pdev) if (!fsl_ldb) return -ENOMEM; + fsl_ldb->devdata = of_device_get_match_data(dev); + if (!fsl_ldb->devdata) + return -EINVAL; + fsl_ldb->dev = &pdev->dev; fsl_ldb->bridge.funcs = &funcs; fsl_ldb->bridge.of_node = dev->of_node; @@ -321,7 +357,10 @@ static int fsl_ldb_remove(struct platform_device *pdev) } static const struct of_device_id fsl_ldb_match[] = { - { .compatible = "fsl,imx8mp-ldb", }, + { .compatible = "fsl,imx8mp-ldb", + .data = &fsl_ldb_devdata[IMX8MP_LDB], }, + { .compatible = "fsl,imx93-ldb", + .data = &fsl_ldb_devdata[IMX93_LDB], }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, fsl_ldb_match); From 919d320fb8c6a40669ef0eb64aa97326f192415d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 19 Jan 2023 18:18:54 +0200 Subject: [PATCH 33/45] drm/edid: split HDMI VSDB info and mode parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate the parsing of display info and modes from the HDMI VSDB. This is prerequisite work for overall better separation of the two parsing steps. The info parsing is about figuring out whether the sink supports HDMI infoframes. Since they were added in HDMI 1.4, assume the sink supports HDMI infoframes if it has the HDMI_Video_present bit set (introduced in HDMI 1.4). For details, see commit f1781e9bb2dd ("drm/edid: Allow HDMI infoframe without VIC or S3D"). The logic is not exactly the same, but since it was somewhat heuristic to begin with, assume this is close enough. v2: - Simplify to only check HDMI_Video_present bit (Ville) - Drop cea_db_raw_size() helper (Ville) Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/238e15f7ab15a86f7fd1812271dcaec9bc6e1506.1674144945.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index e8b67f3f5c91..ee453e39562a 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4713,7 +4713,6 @@ static int hdmi_vsdb_latency_length(const u8 *db) static int do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len) { - struct drm_display_info *info = &connector->display_info; int modes = 0, offset = 0, i, multi_present = 0, multi_len; u8 vic_len, hdmi_3d_len = 0; u16 mask; @@ -4831,8 +4830,6 @@ do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len) } out: - if (modes > 0) - info->has_hdmi_infoframe = true; return modes; } @@ -6153,6 +6150,7 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, } } +/* HDMI Vendor-Specific Data Block (HDMI VSDB, H14b-VSDB) */ static void drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db) { @@ -6166,6 +6164,15 @@ drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db) if (len >= 7) info->max_tmds_clock = db[7] * 5000; + /* + * Try to infer whether the sink supports HDMI infoframes. + * + * HDMI infoframe support was first added in HDMI 1.4. Assume the sink + * supports infoframes if HDMI_Video_present is set. + */ + if (len >= 8 && db[8] & BIT(5)) + info->has_hdmi_infoframe = true; + drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] HDMI: DVI dual %d, max TMDS clock %d kHz\n", connector->base.id, connector->name, info->dvi_dual, info->max_tmds_clock); From e8b1f0d4b6ec09fe32a0f66b99a4951214d768ee Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 19 Jan 2023 18:18:55 +0200 Subject: [PATCH 34/45] drm/edid: refactor _drm_edid_connector_update() and rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By moving update_display_info() out of _drm_edid_connector_update() we make the function purely about adding modes. Rename accordingly. Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/e9880bbb2b5724d9aac88a90a31ba3ba9af9da3f.1674144945.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ee453e39562a..f9502df3dbca 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6637,19 +6637,12 @@ static int add_displayid_detailed_modes(struct drm_connector *connector, return num_modes; } -static int _drm_edid_connector_update(struct drm_connector *connector, - const struct drm_edid *drm_edid) +static int _drm_edid_connector_add_modes(struct drm_connector *connector, + const struct drm_edid *drm_edid) { const struct drm_display_info *info = &connector->display_info; int num_modes = 0; - /* - * CEA-861-F adds ycbcr capability map block, for HDMI 2.0 sinks. - * To avoid multiple parsing of same block, lets parse that map - * from sink info, before parsing CEA modes. - */ - update_display_info(connector, drm_edid); - if (!drm_edid) return 0; @@ -6754,7 +6747,9 @@ int drm_edid_connector_update(struct drm_connector *connector, { int count; - count = _drm_edid_connector_update(connector, drm_edid); + update_display_info(connector, drm_edid); + + count = _drm_edid_connector_add_modes(connector, drm_edid); _drm_update_tile_info(connector, drm_edid); @@ -6824,7 +6819,8 @@ EXPORT_SYMBOL(drm_connector_update_edid_property); */ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) { - struct drm_edid drm_edid; + struct drm_edid _drm_edid; + const struct drm_edid *drm_edid; if (edid && !drm_edid_is_valid(edid)) { drm_warn(connector->dev, "[CONNECTOR:%d:%s] EDID invalid.\n", @@ -6832,8 +6828,11 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) edid = NULL; } - return _drm_edid_connector_update(connector, - drm_edid_legacy_init(&drm_edid, edid)); + drm_edid = drm_edid_legacy_init(&_drm_edid, edid); + + update_display_info(connector, drm_edid); + + return _drm_edid_connector_add_modes(connector, drm_edid); } EXPORT_SYMBOL(drm_add_edid_modes); From c533b5167c7e40e82dd724f2a0ffa691f7f386d2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 19 Jan 2023 18:18:56 +0200 Subject: [PATCH 35/45] drm/edid: add separate drm_edid_connector_add_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original goal with drm_edid_connector_update() was to have a single call for updating the connector and adding probed modes, in this order, but that turned out to be problematic. Drivers that need to update the connector in the .detect() callback would end up updating the probed modes as well. Turns out the callback may be called so many times that the probed mode list fills up without bounds, and this is amplified by add_alternate_cea_modes() duplicating the CEA modes on every call, actually running out of memory on some machines. Kudos to Imre Deak for explaining this to me. Go back to having separate drm_edid_connector_update() and drm_edid_connector_add_modes() calls. The former may be called from .detect(), .force(), or .get_modes(), but the latter only from .get_modes(). Unlike drm_add_edid_modes(), have drm_edid_connector_add_modes() update the probed modes from the EDID property instead of the passed in EDID. This is mainly to enforce two things: 1) drm_edid_connector_update() must be called before drm_edid_connector_add_modes(). Display info and quirks are needed for parsing the modes, and we don't want to call update_display_info() again to ensure the info is available, like drm_add_edid_modes() does. 2) The same EDID is used for both updating the connector and adding the probed modes. Fortunately, the change is easy, because no driver has actually adopted drm_edid_connector_update(). Not even i915, and that's mainly because of the problem described above. Cc: Imre Deak Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/e86fff1579f14ebf6334692526c8f6831cd02cac.1674144945.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 46 +++++++++++++++++++++++------- drivers/gpu/drm/drm_probe_helper.c | 4 ++- include/drm/drm_edid.h | 2 ++ 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f9502df3dbca..d0c21d27b978 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6735,30 +6735,54 @@ static int _drm_edid_connector_property_update(struct drm_connector *connector, * @connector: Connector * @drm_edid: EDID * - * Update the connector mode list, display info, ELD, HDR metadata, relevant - * properties, etc. from the passed in EDID. + * Update the connector display info, ELD, HDR metadata, relevant properties, + * etc. from the passed in EDID. * * If EDID is NULL, reset the information. * - * Return: The number of modes added or 0 if we couldn't find any. + * Must be called before calling drm_edid_connector_add_modes(). + * + * Return: 0 on success, negative error on errors. */ int drm_edid_connector_update(struct drm_connector *connector, const struct drm_edid *drm_edid) { - int count; - update_display_info(connector, drm_edid); - count = _drm_edid_connector_add_modes(connector, drm_edid); - _drm_update_tile_info(connector, drm_edid); - /* Note: Ignore errors for now. */ - _drm_edid_connector_property_update(connector, drm_edid); + return _drm_edid_connector_property_update(connector, drm_edid); +} +EXPORT_SYMBOL(drm_edid_connector_update); + +/** + * drm_edid_connector_add_modes - Update probed modes from the EDID property + * @connector: Connector + * + * Add the modes from the previously updated EDID property to the connector + * probed modes list. + * + * drm_edid_connector_update() must have been called before this to update the + * EDID property. + * + * Return: The number of modes added, or 0 if we couldn't find any. + */ +int drm_edid_connector_add_modes(struct drm_connector *connector) +{ + const struct drm_edid *drm_edid = NULL; + int count; + + if (connector->edid_blob_ptr) + drm_edid = drm_edid_alloc(connector->edid_blob_ptr->data, + connector->edid_blob_ptr->length); + + count = _drm_edid_connector_add_modes(connector, drm_edid); + + drm_edid_free(drm_edid); return count; } -EXPORT_SYMBOL(drm_edid_connector_update); +EXPORT_SYMBOL(drm_edid_connector_add_modes); static int _drm_connector_update_edid_property(struct drm_connector *connector, const struct drm_edid *drm_edid) @@ -6813,7 +6837,7 @@ EXPORT_SYMBOL(drm_connector_update_edid_property); * &drm_display_info structure and ELD in @connector with any information which * can be derived from the edid. * - * This function is deprecated. Use drm_edid_connector_update() instead. + * This function is deprecated. Use drm_edid_connector_add_modes() instead. * * Return: The number of modes added or 0 if we couldn't find any. */ diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 7973f2589ced..95aeeed33cf5 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -1163,7 +1163,9 @@ int drm_connector_helper_get_modes(struct drm_connector *connector) * EDID. Otherwise, if the EDID is NULL, clear the connector * information. */ - count = drm_edid_connector_update(connector, drm_edid); + drm_edid_connector_update(connector, drm_edid); + + count = drm_edid_connector_add_modes(connector); drm_edid_free(drm_edid); diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 372963600f1d..70ae6c290bdc 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -609,6 +609,8 @@ const struct drm_edid *drm_edid_read_custom(struct drm_connector *connector, void *context); int drm_edid_connector_update(struct drm_connector *connector, const struct drm_edid *edid); +int drm_edid_connector_add_modes(struct drm_connector *connector); + const u8 *drm_find_edid_extension(const struct drm_edid *drm_edid, int ext_id, int *ext_index); From b494d6283deb97d18b527bbfe88e5c7be7db05ed Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 19 Jan 2023 18:18:57 +0200 Subject: [PATCH 36/45] drm/edid: remove redundant _drm_connector_update_edid_property() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Realize that drm_edid_connector_update() and _drm_connector_update_edid_property() are now the same thing. Drop the latter. Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/712cc299afe33d8f6279a15d5b0117aeeab88bb4.1674144945.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d0c21d27b978..3d0a4da661bc 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6784,24 +6784,6 @@ int drm_edid_connector_add_modes(struct drm_connector *connector) } EXPORT_SYMBOL(drm_edid_connector_add_modes); -static int _drm_connector_update_edid_property(struct drm_connector *connector, - const struct drm_edid *drm_edid) -{ - /* - * Set the display info, using edid if available, otherwise resetting - * the values to defaults. This duplicates the work done in - * drm_add_edid_modes, but that function is not consistently called - * before this one in all drivers and the computation is cheap enough - * that it seems better to duplicate it rather than attempt to ensure - * some arbitrary ordering of calls. - */ - update_display_info(connector, drm_edid); - - _drm_update_tile_info(connector, drm_edid); - - return _drm_edid_connector_property_update(connector, drm_edid); -} - /** * drm_connector_update_edid_property - update the edid property of a connector * @connector: drm connector @@ -6823,8 +6805,7 @@ int drm_connector_update_edid_property(struct drm_connector *connector, { struct drm_edid drm_edid; - return _drm_connector_update_edid_property(connector, - drm_edid_legacy_init(&drm_edid, edid)); + return drm_edid_connector_update(connector, drm_edid_legacy_init(&drm_edid, edid)); } EXPORT_SYMBOL(drm_connector_update_edid_property); From 241d5852468688e428236f9f3589ed0a35fb20de Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:30:56 +0100 Subject: [PATCH 37/45] dt-bindings: display: simple-framebuffer: Support system memory framebuffers In order to support framebuffers residing in system memory, allow the memory-region property to override the framebuffer memory specification in the "reg" property. Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-2-thierry.reding@gmail.com --- .../devicetree/bindings/display/simple-framebuffer.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml index dd64f70b5014..3e9857eb002e 100644 --- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml +++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml @@ -63,6 +63,11 @@ properties: reg: description: Location and size of the framebuffer memory + memory-region: + maxItems: 1 + description: Phandle to a node describing the memory to be used for the + framebuffer. If present, overrides the "reg" property (if one exists). + clocks: description: List of clocks used by the framebuffer. From ed1055394134674110dc51c972a09bf37bd4672e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:30:57 +0100 Subject: [PATCH 38/45] dt-bindings: display: simple-framebuffer: Document 32-bit BGR format This is a variant of the 32-bit RGB format where the red and blue components are swapped. Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-3-thierry.reding@gmail.com --- .../devicetree/bindings/display/simple-framebuffer.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml index 3e9857eb002e..3c9f29e428a4 100644 --- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml +++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml @@ -99,6 +99,7 @@ properties: * `x1r5g5b5` - 16-bit pixels, d[14:10]=r, d[9:5]=g, d[4:0]=b * `x2r10g10b10` - 32-bit pixels, d[29:20]=r, d[19:10]=g, d[9:0]=b * `x8r8g8b8` - 32-bit pixels, d[23:16]=r, d[15:8]=g, d[7:0]=b + * `x8b8g8r8` - 32-bit pixels, d[23:16]=b, d[15:8]=g, d[7:0]=r enum: - a1r5g5b5 - a2r10g10b10 @@ -110,6 +111,7 @@ properties: - x1r5g5b5 - x2r10g10b10 - x8r8g8b8 + - x8b8g8r8 display: $ref: /schemas/types.yaml#/definitions/phandle From 1cd1b5c2f6ec91d2843bc1ff42bdacef97218ec6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:30:58 +0100 Subject: [PATCH 39/45] dt-bindings: reserved-memory: Support framebuffer reserved memory Document the "framebuffer" compatible string for reserved memory nodes to annotate reserved memory regions used for framebuffer carveouts. Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-4-thierry.reding@gmail.com --- .../bindings/reserved-memory/framebuffer.yaml | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml diff --git a/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml b/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml new file mode 100644 index 000000000000..05b6648b3458 --- /dev/null +++ b/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/reserved-memory/framebuffer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: /reserved-memory framebuffer node bindings + +maintainers: + - devicetree-spec@vger.kernel.org + +allOf: + - $ref: reserved-memory.yaml + +properties: + compatible: + const: framebuffer + description: > + This indicates a region of memory meant to be used as a framebuffer for + a set of display devices. It can be used by an operating system to keep + the framebuffer from being overwritten and use it as the backing memory + for a display device (such as simple-framebuffer). + +unevaluatedProperties: false + +examples: + - | + / { + compatible = "foo"; + model = "foo"; + #address-cells = <1>; + #size-cells = <1>; + + chosen { + framebuffer { + compatible = "simple-framebuffer"; + memory-region = <&fb>; + }; + }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + fb: framebuffer@80000000 { + compatible = "framebuffer"; + reg = <0x80000000 0x007e9000>; + }; + }; + }; +... From fa904b4cbc60afa042e59a07251f54aa208218ce Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:30:59 +0100 Subject: [PATCH 40/45] drm/simpledrm: Use struct iosys_map consistently The majority of the driver already uses struct iosys_map to encapsulate accesses to I/O remapped vs. system memory. Accesses via the screen base pointer still use __iomem annotations, which can lead to inconsistencies and conflicts with subsequent patches. Convert the screen base to a struct iosys_map as well for consistency and to avoid these issues. Reviewed-by: Thomas Zimmermann Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-5-thierry.reding@gmail.com --- drivers/gpu/drm/tiny/simpledrm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index f658b99c796a..c1ed6dd426ec 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -208,7 +208,7 @@ struct simpledrm_device { unsigned int pitch; /* memory management */ - void __iomem *screen_base; + struct iosys_map screen_base; /* modesetting */ uint32_t formats[8]; @@ -473,15 +473,15 @@ static void simpledrm_primary_plane_helper_atomic_update(struct drm_plane *plane drm_atomic_helper_damage_iter_init(&iter, old_plane_state, plane_state); drm_atomic_for_each_plane_damage(&iter, &damage) { - struct iosys_map dst = IOSYS_MAP_INIT_VADDR(sdev->screen_base); struct drm_rect dst_clip = plane_state->dst; + struct iosys_map dst = sdev->screen_base; if (!drm_rect_intersect(&dst_clip, &damage)) continue; iosys_map_incr(&dst, drm_fb_clip_offset(sdev->pitch, sdev->format, &dst_clip)); - drm_fb_blit(&dst, &sdev->pitch, sdev->format->format, shadow_plane_state->data, fb, - &damage); + drm_fb_blit(&dst, &sdev->pitch, sdev->format->format, shadow_plane_state->data, + fb, &damage); } drm_dev_exit(idx); @@ -500,7 +500,7 @@ static void simpledrm_primary_plane_helper_atomic_disable(struct drm_plane *plan return; /* Clear screen to black if disabled */ - memset_io(sdev->screen_base, 0, sdev->pitch * sdev->mode.vdisplay); + iosys_map_memset(&sdev->screen_base, 0, 0, sdev->pitch * sdev->mode.vdisplay); drm_dev_exit(idx); } @@ -703,7 +703,8 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv, screen_base = devm_ioremap_wc(&pdev->dev, mem->start, resource_size(mem)); if (!screen_base) return ERR_PTR(-ENOMEM); - sdev->screen_base = screen_base; + + iosys_map_set_vaddr_iomem(&sdev->screen_base, screen_base); /* * Modesetting From 9a10c7e6519b3d4c2006b20b1675525b0da07e85 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:31:00 +0100 Subject: [PATCH 41/45] drm/simpledrm: Add support for system memory framebuffers Simple framebuffers can be set up in system memory, which cannot be requested and/or I/O remapped using the I/O resource helpers. Add a separate code path that obtains system memory framebuffers from the reserved memory region referenced in the memory-region property. Reviewed-by: Thomas Zimmermann Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-6-thierry.reding@gmail.com --- drivers/gpu/drm/tiny/simpledrm.c | 103 +++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index c1ed6dd426ec..2acc0eb32489 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -184,6 +185,31 @@ simplefb_get_format_of(struct drm_device *dev, struct device_node *of_node) return simplefb_get_validated_format(dev, format); } +static struct resource * +simplefb_get_memory_of(struct drm_device *dev, struct device_node *of_node) +{ + struct device_node *np; + struct resource *res; + int err; + + np = of_parse_phandle(of_node, "memory-region", 0); + if (!np) + return NULL; + + res = devm_kzalloc(dev->dev, sizeof(*res), GFP_KERNEL); + if (!res) + return ERR_PTR(-ENOMEM); + + err = of_address_to_resource(np, 0, res); + if (err) + return ERR_PTR(err); + + if (of_get_property(of_node, "reg", NULL)) + drm_warn(dev, "preferring \"memory-region\" over \"reg\" property\n"); + + return res; +} + /* * Simple Framebuffer device */ @@ -604,8 +630,7 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv, struct drm_device *dev; int width, height, stride; const struct drm_format_info *format; - struct resource *res, *mem; - void __iomem *screen_base; + struct resource *res, *mem = NULL; struct drm_plane *primary_plane; struct drm_crtc *crtc; struct drm_encoder *encoder; @@ -657,6 +682,9 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv, format = simplefb_get_format_of(dev, of_node); if (IS_ERR(format)) return ERR_CAST(format); + mem = simplefb_get_memory_of(dev, of_node); + if (IS_ERR(mem)) + return ERR_CAST(mem); } else { drm_err(dev, "no simplefb configuration found\n"); return ERR_PTR(-ENODEV); @@ -679,33 +707,56 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv, * Memory management */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return ERR_PTR(-EINVAL); + if (mem) { + void *screen_base; - ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res)); - if (ret) { - drm_err(dev, "could not acquire memory range %pr: error %d\n", res, ret); - return ERR_PTR(ret); + ret = devm_aperture_acquire_from_firmware(dev, mem->start, resource_size(mem)); + if (ret) { + drm_err(dev, "could not acquire memory range %pr: %d\n", mem, ret); + return ERR_PTR(ret); + } + + drm_dbg(dev, "using system memory framebuffer at %pr\n", mem); + + screen_base = devm_memremap(dev->dev, mem->start, resource_size(mem), MEMREMAP_WC); + if (!screen_base) + return ERR_PTR(-ENOMEM); + + iosys_map_set_vaddr(&sdev->screen_base, screen_base); + } else { + void __iomem *screen_base; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return ERR_PTR(-EINVAL); + + ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res)); + if (ret) { + drm_err(dev, "could not acquire memory range %pr: %d\n", &res, ret); + return ERR_PTR(ret); + } + + drm_dbg(dev, "using I/O memory framebuffer at %pr\n", res); + + mem = devm_request_mem_region(&pdev->dev, res->start, resource_size(res), + drv->name); + if (!mem) { + /* + * We cannot make this fatal. Sometimes this comes from magic + * spaces our resource handlers simply don't know about. Use + * the I/O-memory resource as-is and try to map that instead. + */ + drm_warn(dev, "could not acquire memory region %pr\n", res); + mem = res; + } + + screen_base = devm_ioremap_wc(&pdev->dev, mem->start, resource_size(mem)); + if (!screen_base) + return ERR_PTR(-ENOMEM); + + iosys_map_set_vaddr_iomem(&sdev->screen_base, screen_base); } - mem = devm_request_mem_region(&pdev->dev, res->start, resource_size(res), drv->name); - if (!mem) { - /* - * We cannot make this fatal. Sometimes this comes from magic - * spaces our resource handlers simply don't know about. Use - * the I/O-memory resource as-is and try to map that instead. - */ - drm_warn(dev, "could not acquire memory region %pr\n", res); - mem = res; - } - - screen_base = devm_ioremap_wc(&pdev->dev, mem->start, resource_size(mem)); - if (!screen_base) - return ERR_PTR(-ENOMEM); - - iosys_map_set_vaddr_iomem(&sdev->screen_base, screen_base); - /* * Modesetting */ From 9abecb1d338c576bef90dd8c4f58485bc56b64ca Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:31:01 +0100 Subject: [PATCH 42/45] drm/format-helper: Support the AB24/XB24 formats Add a conversion helper for the AB24 and XB24 formats to use in drm_fb_blit(). Reviewed-by: Thomas Zimmermann Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-7-thierry.reding@gmail.com --- drivers/gpu/drm/drm_format_helper.c | 66 +++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 994f8fb71f45..f93a4efcee90 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -649,6 +649,66 @@ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_ } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888); +static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) +{ + __le32 *dbuf32 = dbuf; + const __le32 *sbuf32 = sbuf; + unsigned int x; + u32 pix; + + for (x = 0; x < pixels; x++) { + pix = le32_to_cpu(sbuf32[x]); + pix = ((pix & 0x00ff0000) >> 16) << 0 | + ((pix & 0x0000ff00) >> 8) << 8 | + ((pix & 0x000000ff) >> 0) << 16 | + GENMASK(31, 24); /* fill alpha bits */ + *dbuf32++ = cpu_to_le32(pix); + } +} + +static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, + const struct drm_framebuffer *fb, + const struct drm_rect *clip) +{ + static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { + 4, + }; + + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xrgb8888_to_abgr8888_line); +} + +static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) +{ + __le32 *dbuf32 = dbuf; + const __le32 *sbuf32 = sbuf; + unsigned int x; + u32 pix; + + for (x = 0; x < pixels; x++) { + pix = le32_to_cpu(sbuf32[x]); + pix = ((pix & 0x00ff0000) >> 16) << 0 | + ((pix & 0x0000ff00) >> 8) << 8 | + ((pix & 0x000000ff) >> 0) << 16 | + ((pix & 0xff000000) >> 24) << 24; + *dbuf32++ = cpu_to_le32(pix); + } +} + +static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, + const struct drm_framebuffer *fb, + const struct drm_rect *clip) +{ + static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { + 4, + }; + + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xrgb8888_to_xbgr8888_line); +} + static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le32 *dbuf32 = dbuf; @@ -868,6 +928,12 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d } else if (dst_format == DRM_FORMAT_ARGB8888) { drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip); return 0; + } else if (dst_format == DRM_FORMAT_XBGR8888) { + drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip); + return 0; + } else if (dst_format == DRM_FORMAT_ABGR8888) { + drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip); + return 0; } else if (dst_format == DRM_FORMAT_XRGB2101010) { drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip); return 0; From 8681e3663411ee6b64dd0714b23f5c86f64f7533 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:31:02 +0100 Subject: [PATCH 43/45] drm/simpledrm: Support the XB24/AB24 format Add XB24 and AB24 to the list of supported formats. The format helpers support conversion to these formats and they are documented in the simple-framebuffer device tree bindings. Reviewed-by: Thomas Zimmermann Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-8-thierry.reding@gmail.com --- include/linux/platform_data/simplefb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/platform_data/simplefb.h b/include/linux/platform_data/simplefb.h index 27ea99af6e1d..4f94d52ac99f 100644 --- a/include/linux/platform_data/simplefb.h +++ b/include/linux/platform_data/simplefb.h @@ -22,6 +22,7 @@ { "r8g8b8", 24, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_RGB888 }, \ { "x8r8g8b8", 32, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_XRGB8888 }, \ { "a8r8g8b8", 32, {16, 8}, {8, 8}, {0, 8}, {24, 8}, DRM_FORMAT_ARGB8888 }, \ + { "x8b8g8r8", 32, {0, 8}, {8, 8}, {16, 8}, {0, 0}, DRM_FORMAT_XBGR8888 }, \ { "a8b8g8r8", 32, {0, 8}, {8, 8}, {16, 8}, {24, 8}, DRM_FORMAT_ABGR8888 }, \ { "x2r10g10b10", 32, {20, 10}, {10, 10}, {0, 10}, {0, 0}, DRM_FORMAT_XRGB2101010 }, \ { "a2r10g10b10", 32, {20, 10}, {10, 10}, {0, 10}, {30, 2}, DRM_FORMAT_ARGB2101010 }, \ From 998101f2a78cf506022f2094461105cb3d00e19f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 21 Jan 2023 20:24:16 +0100 Subject: [PATCH 44/45] fbdev: Remove unused struct fb_deferred_io .first_io field This optional callback was added in the commit 1f45f9dbb392 ("fb_defio: add first_io callback") but it was never used by a driver. Let's remove it since it's unlikely that will be used after a decade that was added. Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230121192418.2814955-2-javierm@redhat.com --- drivers/video/fbdev/core/fb_defio.c | 4 ---- include/linux/fb.h | 1 - 2 files changed, 5 deletions(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index c730253ab85c..1b680742b7f3 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -157,10 +157,6 @@ static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long /* protect against the workqueue changing the page list */ mutex_lock(&fbdefio->lock); - /* first write in this cycle, notify the driver */ - if (fbdefio->first_io && list_empty(&fbdefio->pagereflist)) - fbdefio->first_io(info); - pageref = fb_deferred_io_pageref_get(info, offset, page); if (WARN_ON_ONCE(!pageref)) { ret = VM_FAULT_OOM; diff --git a/include/linux/fb.h b/include/linux/fb.h index 30183fd259ae..daf336385613 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -215,7 +215,6 @@ struct fb_deferred_io { struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ - void (*first_io)(struct fb_info *info); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif From 51affef35bb39f186aef7eeeb4a7f9ceccd3e65e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 21 Jan 2023 20:09:30 +0100 Subject: [PATCH 45/45] drm/ssd130x: Silence a `dubious: x & !y` warning The sparse tool complains with the following warning: $ make M=drivers/gpu/drm/solomon/ C=2 CC [M] drivers/gpu/drm/solomon/ssd130x.o CHECK drivers/gpu/drm/solomon/ssd130x.c drivers/gpu/drm/solomon/ssd130x.c:363:21: warning: dubious: x & !y This seems to be a false positive in my opinion but still we can silence the tool while making the code easier to read. Let's also add a comment, to explain why the "com_seq" logical not is used rather than its value. Reported-by: Thomas Zimmermann Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230121190930.2804224-1-javierm@redhat.com --- drivers/gpu/drm/solomon/ssd130x.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index c3bf3a18302e..b16330a8b624 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -81,7 +81,7 @@ #define SSD130X_SET_PRECHARGE_PERIOD2_MASK GENMASK(7, 4) #define SSD130X_SET_PRECHARGE_PERIOD2_SET(val) FIELD_PREP(SSD130X_SET_PRECHARGE_PERIOD2_MASK, (val)) #define SSD130X_SET_COM_PINS_CONFIG1_MASK GENMASK(4, 4) -#define SSD130X_SET_COM_PINS_CONFIG1_SET(val) FIELD_PREP(SSD130X_SET_COM_PINS_CONFIG1_MASK, !(val)) +#define SSD130X_SET_COM_PINS_CONFIG1_SET(val) FIELD_PREP(SSD130X_SET_COM_PINS_CONFIG1_MASK, (val)) #define SSD130X_SET_COM_PINS_CONFIG2_MASK GENMASK(5, 5) #define SSD130X_SET_COM_PINS_CONFIG2_SET(val) FIELD_PREP(SSD130X_SET_COM_PINS_CONFIG2_MASK, (val)) @@ -298,6 +298,7 @@ static void ssd130x_power_off(struct ssd130x_device *ssd130x) static int ssd130x_init(struct ssd130x_device *ssd130x) { u32 precharge, dclk, com_invdir, compins, chargepump, seg_remap; + bool scan_mode; int ret; /* Set initial contrast */ @@ -360,7 +361,13 @@ static int ssd130x_init(struct ssd130x_device *ssd130x) /* Set COM pins configuration */ compins = BIT(1); - compins |= (SSD130X_SET_COM_PINS_CONFIG1_SET(ssd130x->com_seq) | + /* + * The COM scan mode field values are the inverse of the boolean DT + * property "solomon,com-seq". The value 0b means scan from COM0 to + * COM[N - 1] while 1b means scan from COM[N - 1] to COM0. + */ + scan_mode = !ssd130x->com_seq; + compins |= (SSD130X_SET_COM_PINS_CONFIG1_SET(scan_mode) | SSD130X_SET_COM_PINS_CONFIG2_SET(ssd130x->com_lrremap)); ret = ssd130x_write_cmd(ssd130x, 2, SSD130X_SET_COM_PINS_CONFIG, compins); if (ret < 0)