From a5e400a985df8041ed4659ed1462aa9134318130 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Aug 2023 20:58:56 +0300 Subject: [PATCH 01/14] net/mlx5e: Honor user choice of IPsec replay window size Users can configure IPsec replay window size, but mlx5 driver didn't honor their choice and set always 32bits. Fix assignment logic to configure right size from the beginning. Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") Reviewed-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 21 +++++++++++++++++++ .../mlx5/core/en_accel/ipsec_offload.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 7 +++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 655496598c68..4028932d93ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -335,6 +335,27 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->replay_esn.esn = sa_entry->esn_state.esn; attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; attrs->replay_esn.overlap = sa_entry->esn_state.overlap; + switch (x->replay_esn->replay_window) { + case 32: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT; + break; + case 64: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT; + break; + case 128: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT; + break; + case 256: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT; + break; + default: + WARN_ON(true); + return; + } } attrs->dir = x->xso.dir; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index a91f772dc981..4e018fba2d5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -95,7 +95,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { MLX5_SET(ipsec_aso, aso_ctx, window_sz, - attrs->replay_esn.replay_window / 64); + attrs->replay_esn.replay_window); MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_REPLAY_PROTECTION); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f3631425f38..90ca63f4bf63 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -12001,6 +12001,13 @@ enum { MLX5_IPSEC_ASO_INC_SN = 0x2, }; +enum { + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + struct mlx5_ifc_ipsec_aso_bits { u8 valid[0x1]; u8 reserved_at_201[0x1]; From 3d42c8cc67a8fcbff0181f9ed6d03d353edcee07 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 20 Sep 2023 10:07:13 +0300 Subject: [PATCH 02/14] net/mlx5e: Ensure that IPsec sequence packet number starts from 1 According to RFC4303, section "3.3.3. Sequence Number Generation", the first packet sent using a given SA will contain a sequence number of 1. However if user didn't set seq/oseq, the HW used zero as first sequence packet number. Such misconfiguration causes to drop of first packet if replay window protection was enabled in SA. To fix it, set sequence number to be at least 1. Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 4028932d93ce..914b9e6eb7db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -121,7 +121,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); - sa_entry->esn_state.esn = esn; + if (sa_entry->esn_state.esn_msb) + sa_entry->esn_state.esn = esn; + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = max_t(u32, esn, 1); sa_entry->esn_state.esn_msb = esn_msb; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { From 94af50c0a9bb961fe93cf0fdd14eb0883da86721 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 14:06:18 +0300 Subject: [PATCH 03/14] net/mlx5e: Unify esw and normal IPsec status table creation/destruction Change normal IPsec flow to use the same creation/destruction functions for status flow table as that of ESW, which first of all refines the code to have less code duplication. And more importantly, the ESW status table handles IPsec syndrome checks at steering by HW, which is more efficient than the previous behaviour we had where it was copied to WQE meta data and checked by the driver. Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 191 +++++++++++++----- .../mellanox/mlx5/core/esw/ipsec_fs.c | 152 -------------- .../mellanox/mlx5/core/esw/ipsec_fs.h | 15 -- 3 files changed, 143 insertions(+), 215 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index f41c976dc33f..85ed5171e835 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -128,14 +128,104 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); } -static int ipsec_status_rule(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) +static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) { - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + mlx5_del_flow_rules(rx->status_drop.rule); + mlx5_destroy_flow_group(rx->status_drop.group); + mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); +} + +static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status.rule); + + if (rx != ipsec->rx_esw) + return; + +#ifdef CONFIG_MLX5_ESWITCH + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); +#endif +} + +static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_modify_hdr *modify_hdr; - struct mlx5_flow_handle *fte; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto err_out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop flow group, err=%d\n", err); + goto err_out; + } + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + + rx->status_drop.group = g; + rx->status_drop.rule = rule; + rx->status_drop_cnt = flow_counter; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + mlx5_destroy_flow_group(g); +err_out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err; @@ -143,48 +233,61 @@ static int ipsec_status_rule(struct mlx5_core_dev *mdev, if (!spec) return -ENOMEM; - /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */ - MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); - MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME); - MLX5_SET(copy_action_in, action, src_offset, 0); - MLX5_SET(copy_action_in, action, length, 7); - MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(copy_action_in, action, dst_offset, 24); - - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, - 1, action); - - if (IS_ERR(modify_hdr)) { - err = PTR_ERR(modify_hdr); - mlx5_core_err(mdev, - "fail to alloc ipsec copy modify_header_id err=%d\n", err); - goto out_spec; - } - - /* create fte */ - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - flow_act.modify_hdr = modify_hdr; - fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); - if (IS_ERR(fte)) { - err = PTR_ERR(fte); - mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err); - goto out; + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(ipsec->mdev, + "Failed to add ipsec rx status pass rule, err=%d\n", err); + goto err_rule; } + rx->status.rule = rule; kvfree(spec); - rx->status.rule = fte; - rx->status.modify_hdr = modify_hdr; return 0; -out: - mlx5_modify_header_dealloc(mdev, modify_hdr); -out_spec: +err_rule: kvfree(spec); return err; } +static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + ipsec_rx_status_pass_destroy(ipsec, rx); + ipsec_rx_status_drop_destroy(ipsec, rx); +} + +static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + int err; + + err = ipsec_rx_status_drop_create(ipsec, rx); + if (err) + return err; + + err = ipsec_rx_status_pass_create(ipsec, rx, dest); + if (err) + goto err_pass_create; + + return 0; + +err_pass_create: + ipsec_rx_status_drop_destroy(ipsec, rx); + return err; +} + static int ipsec_miss_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *ft, struct mlx5e_ipsec_miss *miss, @@ -333,12 +436,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_destroy_flow_table(rx->ft.sa); if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); - if (rx == ipsec->rx_esw) { - mlx5_esw_ipsec_rx_status_destroy(ipsec, rx); - } else { - mlx5_del_flow_rules(rx->status.rule); - mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); - } + mlx5_ipsec_rx_status_destroy(ipsec, rx); mlx5_destroy_flow_table(rx->ft.status); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); @@ -428,10 +526,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); - if (rx == ipsec->rx_esw) - err = mlx5_esw_ipsec_rx_status_create(ipsec, rx, dest); - else - err = ipsec_status_rule(mdev, rx, dest); + err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); if (err) goto err_add; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 095f31f380fa..13b5916b64e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -21,158 +21,6 @@ enum { MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL, }; -static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - mlx5_del_flow_rules(rx->status_drop.rule); - mlx5_destroy_flow_group(rx->status_drop.group); - mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); -} - -static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - mlx5_del_flow_rules(rx->status.rule); - mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); -} - -static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table *ft = rx->ft.status; - struct mlx5_core_dev *mdev = ipsec->mdev; - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_fc *flow_counter; - struct mlx5_flow_spec *spec; - struct mlx5_flow_group *g; - u32 *flow_group_in; - int err = 0; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!flow_group_in || !spec) { - err = -ENOMEM; - goto err_out; - } - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); - g = mlx5_create_flow_group(ft, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop flow group, err=%d\n", err); - goto err_out; - } - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop rule counter, err=%d\n", err); - goto err_cnt; - } - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop rule, err=%d\n", err); - goto err_rule; - } - - rx->status_drop.group = g; - rx->status_drop.rule = rule; - rx->status_drop_cnt = flow_counter; - - kvfree(flow_group_in); - kvfree(spec); - return 0; - -err_rule: - mlx5_fc_destroy(mdev, flow_counter); -err_cnt: - mlx5_destroy_flow_group(g); -err_out: - kvfree(flow_group_in); - kvfree(spec); - return err; -} - -static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - misc_parameters_2.ipsec_syndrome); - MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.ipsec_syndrome, 0); - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - flow_act.flags = FLOW_ACT_NO_APPEND; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(ipsec->mdev, - "Failed to add ipsec rx status pass rule, err=%d\n", err); - goto err_rule; - } - - rx->status.rule = rule; - kvfree(spec); - return 0; - -err_rule: - kvfree(spec); - return err; -} - -void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - esw_ipsec_rx_status_pass_destroy(ipsec, rx); - esw_ipsec_rx_status_drop_destroy(ipsec, rx); -} - -int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - int err; - - err = esw_ipsec_rx_status_drop_create(ipsec, rx); - if (err) - return err; - - err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest); - if (err) - goto err_pass_create; - - return 0; - -err_pass_create: - esw_ipsec_rx_status_drop_destroy(ipsec, rx); - return err; -} - void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h index 0c90f7a8b0d3..ac9c65b89166 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -8,11 +8,6 @@ struct mlx5e_ipsec; struct mlx5e_ipsec_sa_entry; #ifdef CONFIG_MLX5_ESWITCH -void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx); -int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest); void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr); int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, @@ -26,16 +21,6 @@ void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr); void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); #else -static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) {} - -static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - return -EINVAL; -} - static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) {} From 5ad00dee43b98129b86930d457b983d4d04dc553 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Oct 2023 16:44:19 +0300 Subject: [PATCH 04/14] net/mlx5e: Remove exposure of IPsec RX flow steering struct After previous commit, which unified various IPsec creation modes, there is no need to have struct mlx5e_ipsec_rx exposed in global IPsec header. Move it to ipsec_fs.c to be placed together with already existing struct mlx5e_ipsec_tx. Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h | 14 +------------- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 16 ++++++++++++++-- .../ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 8f4a37bceaf4..c3a40bf11952 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -201,19 +201,6 @@ struct mlx5e_ipsec_miss { struct mlx5_flow_handle *rule; }; -struct mlx5e_ipsec_rx { - struct mlx5e_ipsec_ft ft; - struct mlx5e_ipsec_miss pol; - struct mlx5e_ipsec_miss sa; - struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_miss status_drop; - struct mlx5_fc *status_drop_cnt; - struct mlx5e_ipsec_fc *fc; - struct mlx5_fs_chains *chains; - u8 allow_tunnel_mode : 1; - struct xarray ipsec_obj_id_map; -}; - struct mlx5e_ipsec_tx_create_attr { int prio; int pol_level; @@ -248,6 +235,7 @@ struct mlx5e_ipsec { struct mlx5_ipsec_fs *roce; u8 is_uplink_rep: 1; struct mlx5e_ipsec_mpv_work mpv_work; + struct xarray ipsec_obj_id_map; }; struct mlx5e_ipsec_esn_state { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 85ed5171e835..aa74a2422869 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -32,6 +32,18 @@ struct mlx5e_ipsec_tx { u8 allow_tunnel_mode : 1; }; +struct mlx5e_ipsec_rx { + struct mlx5e_ipsec_ft ft; + struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_miss sa; + struct mlx5e_ipsec_rule status; + struct mlx5e_ipsec_miss status_drop; + struct mlx5_fc *status_drop_cnt; + struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; +}; + /* IPsec RX flow steering */ static enum mlx5_traffic_types family2tt(u32 family) { @@ -2052,7 +2064,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) kfree(ipsec->rx_ipv6); if (ipsec->is_uplink_rep) { - xa_destroy(&ipsec->rx_esw->ipsec_obj_id_map); + xa_destroy(&ipsec->ipsec_obj_id_map); mutex_destroy(&ipsec->tx_esw->ft.mutex); WARN_ON(ipsec->tx_esw->ft.refcnt); @@ -2115,7 +2127,7 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec, mutex_init(&ipsec->tx_esw->ft.mutex); mutex_init(&ipsec->rx_esw->ft.mutex); ipsec->tx_esw->ns = ns_esw; - xa_init_flags(&ipsec->rx_esw->ipsec_obj_id_map, XA_FLAGS_ALLOC1); + xa_init_flags(&ipsec->ipsec_obj_id_map, XA_FLAGS_ALLOC1); } else if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) { ipsec->roce = mlx5_ipsec_fs_roce_init(mdev, devcom); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 13b5916b64e2..5a0047bdcb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -50,7 +50,7 @@ int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, u32 mapped_id; int err; - err = xa_alloc_bh(&ipsec->rx_esw->ipsec_obj_id_map, &mapped_id, + err = xa_alloc_bh(&ipsec->ipsec_obj_id_map, &mapped_id, xa_mk_value(sa_entry->ipsec_obj_id), XA_LIMIT(1, ESW_IPSEC_RX_MAPPED_ID_MASK), 0); if (err) @@ -81,7 +81,7 @@ int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, return 0; err_header_alloc: - xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, mapped_id); + xa_erase_bh(&ipsec->ipsec_obj_id_map, mapped_id); return err; } @@ -90,7 +90,7 @@ void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5e_ipsec *ipsec = sa_entry->ipsec; if (sa_entry->rx_mapped_id) - xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, + xa_erase_bh(&ipsec->ipsec_obj_id_map, sa_entry->rx_mapped_id); } @@ -100,7 +100,7 @@ int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, struct mlx5e_ipsec *ipsec = priv->ipsec; void *val; - val = xa_load(&ipsec->rx_esw->ipsec_obj_id_map, id); + val = xa_load(&ipsec->ipsec_obj_id_map, id); if (!val) return -ENOENT; From dddb49b63d8683be81cff220b94a0196c1367b74 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Sun, 24 Sep 2023 11:50:31 +0300 Subject: [PATCH 05/14] net/mlx5e: Add IPsec and ASO syndromes check in HW After IPsec decryption it isn't enough to only check the IPsec syndrome but need to also check the ASO syndrome in order to verify that the operation was actually successful. Verify that both syndromes are actually zero and in case not drop the packet and increment the appropriate flow counter for the drop reason. Fixes: 6b5c45e16e43 ("net/mlx5e: Configure IPsec packet offload flow steering") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.h | 8 + .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 235 ++++++++++++++++-- 2 files changed, 223 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index c3a40bf11952..adaea3493193 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -189,11 +189,19 @@ struct mlx5e_ipsec_ft { u32 refcnt; }; +struct mlx5e_ipsec_drop { + struct mlx5_flow_handle *rule; + struct mlx5_fc *fc; +}; + struct mlx5e_ipsec_rule { struct mlx5_flow_handle *rule; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_fc *fc; + struct mlx5e_ipsec_drop replay; + struct mlx5e_ipsec_drop auth; + struct mlx5e_ipsec_drop trailer; }; struct mlx5e_ipsec_miss { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index aa74a2422869..aeb399d8dae5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -32,13 +32,17 @@ struct mlx5e_ipsec_tx { u8 allow_tunnel_mode : 1; }; +struct mlx5e_ipsec_status_checks { + struct mlx5_flow_group *drop_all_group; + struct mlx5e_ipsec_drop all; +}; + struct mlx5e_ipsec_rx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; struct mlx5e_ipsec_miss sa; struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_miss status_drop; - struct mlx5_fc *status_drop_cnt; + struct mlx5e_ipsec_status_checks status_drops; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; u8 allow_tunnel_mode : 1; @@ -143,9 +147,9 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { - mlx5_del_flow_rules(rx->status_drop.rule); - mlx5_destroy_flow_group(rx->status_drop.group); - mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); + mlx5_del_flow_rules(rx->status_drops.all.rule); + mlx5_fc_destroy(ipsec->mdev, rx->status_drops.all.fc); + mlx5_destroy_flow_group(rx->status_drops.drop_all_group); } static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, @@ -161,8 +165,149 @@ static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, #endif } -static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) +static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5e_ipsec_rx *rx) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, true); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + sa_entry->ipsec_rule.auth.fc = flow_counter; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags = FLOW_ACT_NO_APPEND; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_2, + sa_entry->ipsec_obj_id | BIT(31)); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + sa_entry->ipsec_rule.auth.rule = rule; + + flow_counter = mlx5_fc_create(mdev, true); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt_2; + } + sa_entry->ipsec_rule.trailer.fc = flow_counter; + + dest.counter_id = mlx5_fc_id(flow_counter); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 2); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule_2; + } + sa_entry->ipsec_rule.trailer.rule = rule; + + kvfree(spec); + return 0; + +err_rule_2: + mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.trailer.fc); +err_cnt_2: + mlx5_del_flow_rules(sa_entry->ipsec_rule.auth.rule); +err_rule: + mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.auth.fc); +err_cnt: + kvfree(spec); + return err; +} + +static int rx_add_rule_drop_replay(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5e_ipsec_rx *rx) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, true); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags = FLOW_ACT_NO_APPEND; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_2, + sa_entry->ipsec_obj_id | BIT(31)); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + + sa_entry->ipsec_rule.replay.rule = rule; + sa_entry->ipsec_rule.replay.fc = flow_counter; + + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + kvfree(spec); + return err; +} + +static int ipsec_rx_status_drop_all_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table *ft = rx->ft.status; @@ -214,9 +359,9 @@ static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, goto err_rule; } - rx->status_drop.group = g; - rx->status_drop.rule = rule; - rx->status_drop_cnt = flow_counter; + rx->status_drops.drop_all_group = g; + rx->status_drops.all.rule = rule; + rx->status_drops.all.fc = flow_counter; kvfree(flow_group_in); kvfree(spec); @@ -247,8 +392,12 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 0); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_4, 0); if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; @@ -285,7 +434,7 @@ static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, { int err; - err = ipsec_rx_status_drop_create(ipsec, rx); + err = ipsec_rx_status_drop_all_create(ipsec, rx); if (err) return err; @@ -529,7 +678,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) return err; - ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 1, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; @@ -1159,29 +1308,48 @@ static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 struct mlx5_flow_act *flow_act) { enum mlx5_flow_namespace_type ns_type = ipsec_fs_get_ns(ipsec, type, dir); - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + u8 action[3][MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_modify_hdr *modify_hdr; + u8 num_of_actions = 1; - MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action[0], action_type, MLX5_ACTION_TYPE_SET); switch (dir) { case XFRM_DEV_OFFLOAD_IN: - MLX5_SET(set_action_in, action, field, + MLX5_SET(set_action_in, action[0], field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + + num_of_actions++; + MLX5_SET(set_action_in, action[1], action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action[1], field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_2); + MLX5_SET(set_action_in, action[1], data, val); + MLX5_SET(set_action_in, action[1], offset, 0); + MLX5_SET(set_action_in, action[1], length, 32); + + if (type == XFRM_DEV_OFFLOAD_CRYPTO) { + num_of_actions++; + MLX5_SET(set_action_in, action[2], action_type, + MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action[2], field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); + MLX5_SET(set_action_in, action[2], data, 0); + MLX5_SET(set_action_in, action[2], offset, 0); + MLX5_SET(set_action_in, action[2], length, 32); + } break; case XFRM_DEV_OFFLOAD_OUT: - MLX5_SET(set_action_in, action, field, + MLX5_SET(set_action_in, action[0], field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); break; default: return -EINVAL; } - MLX5_SET(set_action_in, action, data, val); - MLX5_SET(set_action_in, action, offset, 0); - MLX5_SET(set_action_in, action, length, 32); + MLX5_SET(set_action_in, action[0], data, val); + MLX5_SET(set_action_in, action[0], offset, 0); + MLX5_SET(set_action_in, action[0], length, 32); - modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, 1, action); + modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, num_of_actions, action); if (IS_ERR(modify_hdr)) { mlx5_core_err(mdev, "Failed to allocate modify_header %ld\n", PTR_ERR(modify_hdr)); @@ -1479,6 +1647,15 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); goto err_add_flow; } + if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) + err = rx_add_rule_drop_replay(sa_entry, rx); + if (err) + goto err_add_replay; + + err = rx_add_rule_drop_auth_trailer(sa_entry, rx); + if (err) + goto err_drop_reason; + kvfree(spec); sa_entry->ipsec_rule.rule = rule; @@ -1487,6 +1664,13 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat; return 0; +err_drop_reason: + if (sa_entry->ipsec_rule.replay.rule) { + mlx5_del_flow_rules(sa_entry->ipsec_rule.replay.rule); + mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.replay.fc); + } +err_add_replay: + mlx5_del_flow_rules(rule); err_add_flow: mlx5_fc_destroy(mdev, counter); err_add_cnt: @@ -1994,6 +2178,17 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) if (ipsec_rule->modify_hdr) mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); + + mlx5_del_flow_rules(ipsec_rule->trailer.rule); + mlx5_fc_destroy(mdev, ipsec_rule->trailer.fc); + + mlx5_del_flow_rules(ipsec_rule->auth.rule); + mlx5_fc_destroy(mdev, ipsec_rule->auth.fc); + + if (ipsec_rule->replay.rule) { + mlx5_del_flow_rules(ipsec_rule->replay.rule); + mlx5_fc_destroy(mdev, ipsec_rule->replay.fc); + } mlx5_esw_ipsec_rx_id_mapping_remove(sa_entry); rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family, sa_entry->attrs.type); } From c2bf84f1d1a1595dcc45fe867f0e02b331993fee Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 12 Nov 2023 13:50:00 +0200 Subject: [PATCH 06/14] net/mlx5e: Tidy up IPsec NAT-T SA discovery IPsec NAT-T packets are UDP encapsulated packets over ESP normal ones. In case they arrive to RX, the SPI and ESP are located in inner header, while the check was performed on outer header instead. That wrong check caused to the situation where received rekeying request was missed and caused to rekey timeout, which "compensated" this failure by completing rekeying. Fixes: d65954934937 ("net/mlx5e: Support IPsec NAT-T functionality") Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 22 ++++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index aeb399d8dae5..7a789061c998 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1212,13 +1212,22 @@ static void setup_fte_esp(struct mlx5_flow_spec *spec) MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP); } -static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi) +static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap) { /* SPI number */ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi); + if (encap) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.inner_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.inner_esp_spi, spi); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.outer_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.outer_esp_spi, spi); + } } static void setup_fte_no_frags(struct mlx5_flow_spec *spec) @@ -1596,8 +1605,9 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) else setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - setup_fte_spi(spec, attrs->spi); - setup_fte_esp(spec); + setup_fte_spi(spec, attrs->spi, attrs->encap); + if (!attrs->encap) + setup_fte_esp(spec); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -1719,7 +1729,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: - setup_fte_spi(spec, attrs->spi); + setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); break; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 90ca63f4bf63..3f7b664d625b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -621,7 +621,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0x20]; + u8 inner_esp_spi[0x20]; u8 outer_esp_spi[0x20]; u8 reserved_at_1a0[0x60]; }; From baac8351f74c543896b8fd40138b7ad9365587a3 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 11 Oct 2023 03:38:29 +0000 Subject: [PATCH 07/14] net/mlx5e: Reduce eswitch mode_lock protection context Currently eswitch mode_lock is so heavy, for example, it's locked during the whole process of the mode change, which may need to hold other locks. As the mode_lock is also used by IPSec to block mode and encap change now, it is easy to cause lock dependency. Since some of protections are also done by devlink lock, the eswitch mode_lock is not needed at those places, and thus the possibility of lockdep issue is reduced. Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev") Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 9 +++-- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++------ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + .../mellanox/mlx5/core/eswitch_offloads.c | 40 +++++++++++-------- 4 files changed, 53 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 7a789061c998..c1e89dc77db9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2110,8 +2110,11 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) struct mlx5_eswitch *esw = mdev->priv.eswitch; int err = 0; - if (esw) - down_write(&esw->mode_lock); + if (esw) { + err = mlx5_esw_lock(esw); + if (err) + return err; + } if (mdev->num_block_ipsec) { err = -EBUSY; @@ -2122,7 +2125,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) unlock: if (esw) - up_write(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8d0b915a3121..3047d7015c52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1463,7 +1463,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) { int err; - lockdep_assert_held(&esw->mode_lock); + devl_assert_locked(priv_to_devlink(esw->dev)); if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); @@ -1531,7 +1531,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (toggle_lag) mlx5_lag_disable_change(esw->dev); - down_write(&esw->mode_lock); if (!mlx5_esw_is_fdb_created(esw)) { ret = mlx5_eswitch_enable_locked(esw, num_vfs); } else { @@ -1554,8 +1553,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) } } - up_write(&esw->mode_lock); - if (toggle_lag) mlx5_lag_enable_change(esw->dev); @@ -1569,12 +1566,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) return; devl_assert_locked(priv_to_devlink(esw->dev)); - down_write(&esw->mode_lock); /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) - goto unlock; + return; esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1603,9 +1599,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) esw->esw_funcs.num_vfs = 0; else esw->esw_funcs.num_ec_vfs = 0; - -unlock: - up_write(&esw->mode_lock); } /* Free resources for corresponding eswitch mode. It is called by devlink @@ -1647,10 +1640,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) devl_assert_locked(priv_to_devlink(esw->dev)); mlx5_lag_disable_change(esw->dev); - down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw); esw->mode = MLX5_ESWITCH_LEGACY; - up_write(&esw->mode_lock); mlx5_lag_enable_change(esw->dev); } @@ -2254,8 +2245,13 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev) if (!mlx5_esw_allowed(esw)) return true; - if (down_read_trylock(&esw->mode_lock) != 0) + if (down_read_trylock(&esw->mode_lock) != 0) { + if (esw->eswitch_operation_in_progress) { + up_read(&esw->mode_lock); + return false; + } return true; + } return false; } @@ -2312,7 +2308,8 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) if (down_write_trylock(&esw->mode_lock) == 0) return -EINVAL; - if (atomic64_read(&esw->user_count) > 0) { + if (esw->eswitch_operation_in_progress || + atomic64_read(&esw->user_count) > 0) { up_write(&esw->mode_lock); return -EBUSY; } @@ -2320,6 +2317,18 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) return esw->mode; } +int mlx5_esw_lock(struct mlx5_eswitch *esw) +{ + down_write(&esw->mode_lock); + + if (esw->eswitch_operation_in_progress) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return 0; +} + /** * mlx5_esw_unlock() - Release write lock on esw mode lock * @esw: eswitch device. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 37ab66e7b403..b674b57d05aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -383,6 +383,7 @@ struct mlx5_eswitch { struct xarray paired; struct mlx5_devcom_comp_dev *devcom; u16 enabled_ipsec_vf_count; + bool eswitch_operation_in_progress; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -827,6 +828,7 @@ void mlx5_esw_release(struct mlx5_core_dev *dev); void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); +int mlx5_esw_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 88236e75fd90..bf78eeca401b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3733,13 +3733,16 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, goto unlock; } + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't change mode while devlink traps are active"); err = -EOPNOTSUPP; - goto unlock; + goto skip; } err = esw_offloads_start(esw, extack); } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { @@ -3749,6 +3752,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = -EINVAL; } +skip: + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; unlock: mlx5_esw_unlock(esw); enable_lag: @@ -3759,16 +3765,12 @@ enable_lag: int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); - err = esw_mode_to_devlink(esw->mode, mode); - up_read(&esw->mode_lock); - return err; + return esw_mode_to_devlink(esw->mode, mode); } static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, @@ -3862,11 +3864,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; - err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); - if (err) - goto out; + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); - esw->offloads.inline_mode = mlx5_mode; + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); + if (!err) + esw->offloads.inline_mode = mlx5_mode; + + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; up_write(&esw->mode_lock); return 0; @@ -3878,16 +3884,12 @@ out: int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); - err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); - up_read(&esw->mode_lock); - return err; + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) @@ -3969,6 +3971,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, goto unlock; } + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; @@ -3982,6 +3987,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw); } + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; + unlock: up_write(&esw->mode_lock); return err; @@ -3996,9 +4004,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); *encap = esw->offloads.encap; - up_read(&esw->mode_lock); return 0; } From 4e25b661f484df54b6751b65f9ea2434a3b67539 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 12 Oct 2023 02:00:44 +0000 Subject: [PATCH 08/14] net/mlx5e: Check the number of elements before walk TC rhashtable After IPSec TX tables are destroyed, the flow rules in TC rhashtable, which have the destination to IPSec, are restored to the original one, the uplink. However, when the device is in switchdev mode and unload driver with IPSec rules configured, TC rhashtable cleanup is done before IPSec cleanup, which means tc_ht->tbl is already freed when walking TC rhashtable, in order to restore the destination. So add the checking before walking to avoid unexpected behavior. Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules") Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 5a0047bdcb51..190f10aba170 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev) + if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); From 762a55a54eec4217e4cec9265ab6e5d4c11b61bd Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 30 Oct 2023 15:44:47 +0200 Subject: [PATCH 09/14] net/mlx5e: Disable IPsec offload support if not FW steering IPsec FDB offload can only work with FW steering as of now, disable the cap upon non FW steering. And since the IPSec cap is dynamic now based on steering mode. Cleanup the resources if they exist instead of checking the IPsec cap again. Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'") Signed-off-by: Chris Mi Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 26 ++++++++----------- .../mlx5/core/en_accel/ipsec_offload.c | 8 +++++- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 914b9e6eb7db..161c5190c236 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -935,9 +935,11 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) return; mlx5e_accel_ipsec_fs_cleanup(ipsec); - if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + if (ipsec->netevent_nb.notifier_call) { unregister_netevent_notifier(&ipsec->netevent_nb); - if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) + ipsec->netevent_nb.notifier_call = NULL; + } + if (ipsec->aso) mlx5e_ipsec_aso_cleanup(ipsec); destroy_workqueue(ipsec->wq); kfree(ipsec); @@ -1046,6 +1048,12 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, } } + if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); + return -EINVAL; + } + return 0; } @@ -1141,14 +1149,6 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { .xdo_dev_state_free = mlx5e_xfrm_free_state, .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, -}; - -static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { - .xdo_dev_state_add = mlx5e_xfrm_add_state, - .xdo_dev_state_delete = mlx5e_xfrm_del_state, - .xdo_dev_state_free = mlx5e_xfrm_free_state, - .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, - .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, .xdo_dev_policy_add = mlx5e_xfrm_add_policy, @@ -1166,11 +1166,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); - if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) - netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops; - else - netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; - + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; netdev->features |= NETIF_F_HW_ESP; netdev->hw_enc_features |= NETIF_F_HW_ESP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 4e018fba2d5f..6e00afe4671b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -6,6 +6,8 @@ #include "ipsec.h" #include "lib/crypto.h" #include "lib/ipsec_fs_roce.h" +#include "fs_core.h" +#include "eswitch.h" enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, @@ -38,7 +40,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) caps |= MLX5_IPSEC_CAP_CRYPTO; - if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) { + if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && + is_mdev_legacy_mode(mdev)))) { if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) && MLX5_CAP_FLOWTABLE_NIC_RX(mdev, @@ -559,6 +564,7 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec) dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx), DMA_BIDIRECTIONAL); kfree(aso); + ipsec->aso = NULL; } static void mlx5e_ipsec_aso_copy(struct mlx5_wqe_aso_ctrl_seg *ctrl, From eab0da38912ebdad922ed0388209f7eb0a5163cd Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Sep 2022 18:45:11 +0300 Subject: [PATCH 10/14] net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work Due to the cited patch, devlink health commands take devlink lock and this may result in deadlock for mlx5e_tx_reporter as it takes local state_lock before calling devlink health report and on the other hand devlink health commands such as diagnose for same reporter take local state_lock after taking devlink lock (see kernel log below). To fix it, remove local state_lock from mlx5e_tx_timeout_work() before calling devlink_health_report() and take care to cancel the work before any call to close channels, which may free the SQs that should be handled by the work. Before cancel_work_sync(), use current_work() to check we are not calling it from within the work, as mlx5e_tx_timeout_work() itself may close the channels and reopen as part of recovery flow. While removing state_lock from mlx5e_tx_timeout_work() keep rtnl_lock to ensure no change in netdev->real_num_tx_queues, but use rtnl_trylock() and a flag to avoid deadlock by calling cancel_work_sync() before closing the channels while holding rtnl_lock too. Kernel log: ====================================================== WARNING: possible circular locking dependency detected 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Not tainted ------------------------------------------------------ kworker/u16:2/65 is trying to acquire lock: ffff888122f6c2f8 (&devlink->lock_key#2){+.+.}-{3:3}, at: devlink_health_report+0x2f1/0x7e0 but task is already holding lock: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&priv->state_lock){+.+.}-{3:3}: __mutex_lock+0x12c/0x14b0 mlx5e_rx_reporter_diagnose+0x71/0x700 [mlx5_core] devlink_nl_cmd_health_reporter_diagnose_doit+0x212/0xa50 genl_family_rcv_msg_doit+0x1e9/0x2f0 genl_rcv_msg+0x2e9/0x530 netlink_rcv_skb+0x11d/0x340 genl_rcv+0x24/0x40 netlink_unicast+0x438/0x710 netlink_sendmsg+0x788/0xc40 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x1c1/0x290 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #0 (&devlink->lock_key#2){+.+.}-{3:3}: __lock_acquire+0x2c8a/0x6200 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&priv->state_lock); lock(&devlink->lock_key#2); lock(&priv->state_lock); lock(&devlink->lock_key#2); *** DEADLOCK *** 4 locks held by kworker/u16:2/65: #0: ffff88811a55b138 ((wq_completion)mlx5e#2){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888101de7db8 ((work_completion)(&priv->tx_timeout_work)){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffffffff84ce8328 (rtnl_mutex){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x53/0x280 [mlx5_core] #3: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] stack backtrace: CPU: 1 PID: 65 Comm: kworker/u16:2 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? find_held_lock+0x2d/0x110 ? __stack_depot_save+0x24c/0x520 ? alloc_chain_hlocks+0x228/0x700 __lock_acquire+0x2c8a/0x6200 ? register_lock_class+0x1860/0x1860 ? kasan_save_stack+0x1e/0x40 ? kasan_set_free_info+0x20/0x30 ? ____kasan_slab_free+0x11d/0x1b0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? devlink_health_report+0x3d5/0x7e0 lock_acquire+0x1c1/0x550 ? devlink_health_report+0x2f1/0x7e0 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? find_held_lock+0x2d/0x110 __mutex_lock+0x12c/0x14b0 ? devlink_health_report+0x2f1/0x7e0 ? devlink_health_report+0x2f1/0x7e0 ? mutex_lock_io_nested+0x1320/0x1320 ? trace_hardirqs_on+0x2d/0x100 ? bit_wait_io_timeout+0x170/0x170 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? mlx5e_reporter_tx_err_cqe+0x1b0/0x1b0 [mlx5_core] ? mlx5e_tx_reporter_timeout_dump+0x70/0x70 [mlx5_core] ? mlx5e_tx_reporter_dump_sq+0x320/0x320 [mlx5_core] ? mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] ? mutex_lock_io_nested+0x1320/0x1320 ? process_one_work+0x70f/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? lock_downgrade+0x6e0/0x6e0 mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: c90005b5f75c ("devlink: Hold the instance lock in health callbacks") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b2a5da9739d2..729a11b5fb25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -826,6 +826,7 @@ enum { MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, MLX5E_STATE_XDP_ACTIVE, + MLX5E_STATE_CHANNELS_ACTIVE, }; struct mlx5e_modify_sq_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ea58c6917433..0c87ddb8a7a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2731,6 +2731,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) { int i; + ASSERT_RTNL(); if (chs->ptp) { mlx5e_ptp_close(chs->ptp); chs->ptp = NULL; @@ -3012,17 +3013,29 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) if (mlx5e_is_vport_rep(priv)) mlx5e_rep_activate_channels(priv); + set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_wait_channels_min_rx_wqes(&priv->channels); if (priv->rx_res) mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); } +static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) +{ + WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); + if (current_work() != &priv->tx_timeout_work) + cancel_work_sync(&priv->tx_timeout_work); +} + void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { if (priv->rx_res) mlx5e_rx_res_channels_deactivate(priv->rx_res); + clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_cancel_tx_timeout_work(priv); + if (mlx5e_is_vport_rep(priv)) mlx5e_rep_deactivate_channels(priv); @@ -4801,8 +4814,17 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) struct net_device *netdev = priv->netdev; int i; - rtnl_lock(); - mutex_lock(&priv->state_lock); + /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues + * through this flow. However, channel closing flows have to wait for + * this work to finish while holding rtnl lock too. So either get the + * lock or find that channels are being closed for other reason and + * this work is not relevant anymore. + */ + while (!rtnl_trylock()) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) + return; + msleep(20); + } if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; @@ -4821,7 +4843,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) } unlock: - mutex_unlock(&priv->state_lock); rtnl_unlock(); } From ccbe33003b109f14c4dde2a4fca9c2a50c423601 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 11 Sep 2023 13:28:10 +0300 Subject: [PATCH 11/14] net/mlx5e: TC, Don't offload post action rule if not supported If post action is not supported, eg. ignore_flow_level is not supported, don't offload post action rule. Otherwise, will hit panic [1]. Fix it by checking if post action table is valid or not. [1] [445537.863880] BUG: unable to handle page fault for address: ffffffffffffffb1 [445537.864617] #PF: supervisor read access in kernel mode [445537.865244] #PF: error_code(0x0000) - not-present page [445537.865860] PGD 70683a067 P4D 70683a067 PUD 70683c067 PMD 0 [445537.866497] Oops: 0000 [#1] PREEMPT SMP NOPTI [445537.867077] CPU: 19 PID: 248742 Comm: tc Kdump: loaded Tainted: G O 6.5.0+ #1 [445537.867888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [445537.868834] RIP: 0010:mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] [445537.869635] Code: c0 0d 00 00 e8 20 96 c6 d3 48 85 c0 0f 84 e5 00 00 00 c7 83 b0 01 00 00 00 00 00 00 49 89 c5 31 c0 31 d2 66 89 83 b4 01 00 00 <49> 8b 44 24 10 83 23 df 83 8b d8 01 00 00 04 48 89 83 c0 01 00 00 [445537.871318] RSP: 0018:ffffb98741cef428 EFLAGS: 00010246 [445537.871962] RAX: 0000000000000000 RBX: ffff8df341167000 RCX: 0000000000000001 [445537.872704] RDX: 0000000000000000 RSI: ffffffff954844e1 RDI: ffffffff9546e9cb [445537.873430] RBP: ffffb98741cef448 R08: 0000000000000020 R09: 0000000000000246 [445537.874160] R10: 0000000000000000 R11: ffffffff943f73ff R12: ffffffffffffffa1 [445537.874893] R13: ffff8df36d336c20 R14: ffffffffffffffa1 R15: ffff8df341167000 [445537.875628] FS: 00007fcd6564f800(0000) GS:ffff8dfa9ea00000(0000) knlGS:0000000000000000 [445537.876425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [445537.877090] CR2: ffffffffffffffb1 CR3: 00000003b5884001 CR4: 0000000000770ee0 [445537.877832] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [445537.878564] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [445537.879300] PKRU: 55555554 [445537.879797] Call Trace: [445537.880263] [445537.880713] ? show_regs+0x6e/0x80 [445537.881232] ? __die+0x29/0x70 [445537.881731] ? page_fault_oops+0x85/0x160 [445537.882276] ? search_exception_tables+0x65/0x70 [445537.882852] ? kernelmode_fixup_or_oops+0xa2/0x120 [445537.883432] ? __bad_area_nosemaphore+0x18b/0x250 [445537.884019] ? bad_area_nosemaphore+0x16/0x20 [445537.884566] ? do_kern_addr_fault+0x8b/0xa0 [445537.885105] ? exc_page_fault+0xf5/0x1c0 [445537.885623] ? asm_exc_page_fault+0x2b/0x30 [445537.886149] ? __kmem_cache_alloc_node+0x1df/0x2a0 [445537.886717] ? mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] [445537.887431] ? mlx5e_tc_post_act_add+0x30/0x130 [mlx5_core] [445537.888172] alloc_flow_post_acts+0xfb/0x1c0 [mlx5_core] [445537.888849] parse_tc_actions+0x582/0x5c0 [mlx5_core] [445537.889505] parse_tc_fdb_actions+0xd7/0x1f0 [mlx5_core] [445537.890175] __mlx5e_add_fdb_flow+0x1ab/0x2b0 [mlx5_core] [445537.890843] mlx5e_add_fdb_flow+0x56/0x120 [mlx5_core] [445537.891491] ? debug_smp_processor_id+0x1b/0x30 [445537.892037] mlx5e_tc_add_flow+0x79/0x90 [mlx5_core] [445537.892676] mlx5e_configure_flower+0x305/0x450 [mlx5_core] [445537.893341] mlx5e_rep_setup_tc_cls_flower+0x3d/0x80 [mlx5_core] [445537.894037] mlx5e_rep_setup_tc_cb+0x5c/0xa0 [mlx5_core] [445537.894693] tc_setup_cb_add+0xdc/0x220 [445537.895177] fl_hw_replace_filter+0x15f/0x220 [cls_flower] [445537.895767] fl_change+0xe87/0x1190 [cls_flower] [445537.896302] tc_new_tfilter+0x484/0xa50 Fixes: f0da4daa3413 ("net/mlx5e: Refactor ct to use post action infrastructure") Signed-off-by: Chris Mi Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed Reviewed-by: Automatic Verification Reviewed-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Shachar Kagan Reviewed-by: Tariq Toukan --- .../mellanox/mlx5/core/en/tc/post_act.c | 6 +++++ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 25 ++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 4e923a2874ae..86bf007fd05b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -83,6 +83,9 @@ mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, struct mlx5_flow_spec *spec; int err; + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -111,6 +114,9 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po struct mlx5e_post_act_handle *handle; int err; + if (IS_ERR(post_act)) + return ERR_CAST(post_act); + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7ca9e5b86778..4809a66f3491 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -444,6 +444,9 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, struct mlx5e_flow_meter_handle *meter; enum mlx5e_post_meter_type type; + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); if (IS_ERR(meter)) { mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); @@ -3738,6 +3741,20 @@ out_free: return err; } +static int +set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); + + return 0; +} + static int alloc_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5e_tc_act_branch_ctrl *cond, @@ -3761,8 +3778,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, break; case FLOW_ACTION_ACCEPT: case FLOW_ACTION_PIPE: - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + if (set_branch_dest_ft(flow->priv, attr)) + goto out_err; break; case FLOW_ACTION_JUMP: if (*jump_count) { @@ -3771,8 +3788,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, goto out_err; } *jump_count = cond->extval; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + if (set_branch_dest_ft(flow->priv, attr)) + goto out_err; break; default: err = -EOPNOTSUPP; From 3d7a3f2612d75de5f371a681038b089ded6667eb Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 7 Aug 2023 13:11:32 +0300 Subject: [PATCH 12/14] net/mlx5: Nack sync reset request when HotPlug is enabled Current sync reset flow is not supported when PCIe bridge connected directly to mlx5 device has HotPlug interrupt enabled and can be triggered on link state change event. Return nack on reset request in such case. Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index b568988e92e3..c4e19d627da2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -325,6 +325,29 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) +static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev) +{ + struct pci_dev *bridge = dev->pdev->bus->self; + u16 reg16; + int err; + + if (!bridge) + return -EOPNOTSUPP; + + err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, ®16); + if (err) + return err; + + if ((reg16 & PCI_EXP_SLTCTL_HPIE) && (reg16 & PCI_EXP_SLTCTL_DLLSCE)) { + mlx5_core_warn(dev, "FW reset is not supported as HotPlug is enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) { struct pci_bus *bridge_bus = dev->pdev->bus; @@ -357,6 +380,12 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) return false; } +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) + err = mlx5_check_hotplug_interrupt(dev); + if (err) + return false; +#endif + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); if (err) return false; From 7aaf975238c47b710fcc4eca0da1e7902a53abe2 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 31 Aug 2023 05:47:09 +0300 Subject: [PATCH 13/14] net/mlx5e: Check netdev pointer before checking its net ns Previously, when comparing the net namespaces, the case where the netdev doesn't exist wasn't taken into account, and therefore can cause a crash. In such a case, the comparing function should return false, as there is no netdev->net to compare the devlink->net to. Furthermore, this will result in an attempt to enter switchdev mode without a netdev to fail, and which is the desired result as there is no meaning in switchdev mode without a net device. Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bf78eeca401b..bb8bcb448ae9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3653,14 +3653,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); struct net *devl_net, *netdev_net; - struct mlx5_eswitch *esw; + bool ret = false; - esw = mlx5_devlink_eswitch_nocheck_get(devlink); - netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); - - return net_eq(devl_net, netdev_net); + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + if (dev->mlx5e_res.uplink_netdev) { + netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + ret = net_eq(devl_net, netdev_net); + } + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); + return ret; } int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) From ca4ef28d0ad831d2521fa2b16952f37fd9324ca3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 09:36:20 +0300 Subject: [PATCH 14/14] net/mlx5: Fix a NULL vs IS_ERR() check The mlx5_esw_offloads_devlink_port() function returns error pointers, not NULL. Fixes: 7bef147a6ab6 ("net/mlx5: Don't skip vport check") Signed-off-by: Dan Carpenter Reviewed-by: Wojciech Drewek Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3ab682bbcf86..1bf7540a65ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1497,7 +1497,7 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); - if (dl_port) { + if (!IS_ERR(dl_port)) { SET_NETDEV_DEVLINK_PORT(netdev, dl_port); mlx5e_rep_vnic_reporter_create(priv, dl_port); }